From ea9b00ce56bdc78db6e5b46af9b6bbebf74ad138 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 20 May 2021 15:31:42 +0200 Subject: [PATCH 001/287] adjusted test --- .../dnetlib/dhp/actionmanager/project/utils/EXCELParser.java | 2 +- .../dnetlib/dhp/actionmanager/project/EXCELParserTest.java | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index cc18c6f54..1a6ebb9e8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -32,7 +32,7 @@ public class EXCELParser { XSSFSheet sheet = wb.getSheet(sheetName); - if(sheetName == null){ + if (sheetName == null) { throw new RuntimeException("Sheet name " + sheetName + " not present in current file"); } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index 59b536cd5..72ba48f41 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -35,8 +35,9 @@ public class EXCELParserTest { EXCELParser excelParser = new EXCELParser(); - List pl = excelParser - .parse(httpConnector.getInputSourceAsStream(URL), "eu.dnetlib.dhp.actionmanager.project.utils.ExcelTopic"); + final String classForName = "eu.dnetlib.dhp.actionmanager.project.utils.ExcelTopic"; + final String sheetName = "Topics"; + List pl = excelParser.parse(httpConnector.getInputSourceAsStream(URL), classForName, sheetName); Assertions.assertEquals(3837, pl.size()); From 4d6c473bf18a42a1e0fac7863eddae017d1a7eff Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 20 May 2021 18:26:42 +0200 Subject: [PATCH 002/287] removed redundant classes contained now in dhp-schema --- .../dhp/schema/orcid/AuthorHistory.java | 79 ------------- .../dhp/schema/orcid/AuthorSummary.java | 25 ---- .../dnetlib/dhp/schema/orcid/Contributor.java | 58 ---------- .../dnetlib/dhp/schema/orcid/ExternalId.java | 38 ------ .../dnetlib/dhp/schema/orcid/OrcidData.java | 34 ------ .../dhp/schema/orcid/PublicationDate.java | 38 ------ .../eu/dnetlib/dhp/schema/orcid/Work.java | 16 --- .../dnetlib/dhp/schema/orcid/WorkDetail.java | 109 ------------------ 8 files changed, 397 deletions(-) delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorHistory.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorSummary.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Contributor.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/ExternalId.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidData.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/PublicationDate.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Work.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/WorkDetail.java diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorHistory.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorHistory.java deleted file mode 100644 index 554aae82c..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorHistory.java +++ /dev/null @@ -1,79 +0,0 @@ - -package eu.dnetlib.dhp.schema.orcid; - -import java.io.Serializable; - -public class AuthorHistory implements Serializable { - private String creationMethod; - private String completionDate; - private String submissionDate; - private String lastModifiedDate; - private boolean claimed; - private String deactivationDate; - private boolean verifiedEmail; - private boolean verifiedPrimaryEmail; - - public String getCreationMethod() { - return creationMethod; - } - - public void setCreationMethod(String creationMethod) { - this.creationMethod = creationMethod; - } - - public String getCompletionDate() { - return completionDate; - } - - public void setCompletionDate(String completionDate) { - this.completionDate = completionDate; - } - - public String getSubmissionDate() { - return submissionDate; - } - - public void setSubmissionDate(String submissionDate) { - this.submissionDate = submissionDate; - } - - public String getLastModifiedDate() { - return lastModifiedDate; - } - - public void setLastModifiedDate(String lastModifiedDate) { - this.lastModifiedDate = lastModifiedDate; - } - - public boolean isClaimed() { - return claimed; - } - - public void setClaimed(boolean claimed) { - this.claimed = claimed; - } - - public String getDeactivationDate() { - return deactivationDate; - } - - public void setDeactivationDate(String deactivationDate) { - this.deactivationDate = deactivationDate; - } - - public boolean isVerifiedEmail() { - return verifiedEmail; - } - - public void setVerifiedEmail(boolean verifiedEmail) { - this.verifiedEmail = verifiedEmail; - } - - public boolean isVerifiedPrimaryEmail() { - return verifiedPrimaryEmail; - } - - public void setVerifiedPrimaryEmail(boolean verifiedPrimaryEmail) { - this.verifiedPrimaryEmail = verifiedPrimaryEmail; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorSummary.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorSummary.java deleted file mode 100644 index 813aead49..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorSummary.java +++ /dev/null @@ -1,25 +0,0 @@ - -package eu.dnetlib.dhp.schema.orcid; - -import java.io.Serializable; - -public class AuthorSummary extends OrcidData implements Serializable { - AuthorData authorData; - AuthorHistory authorHistory; - - public AuthorData getAuthorData() { - return authorData; - } - - public void setAuthorData(AuthorData authorData) { - this.authorData = authorData; - } - - public AuthorHistory getAuthorHistory() { - return authorHistory; - } - - public void setAuthorHistory(AuthorHistory authorHistory) { - this.authorHistory = authorHistory; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Contributor.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Contributor.java deleted file mode 100644 index 3b543db4b..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Contributor.java +++ /dev/null @@ -1,58 +0,0 @@ - -package eu.dnetlib.dhp.schema.orcid; - -import java.io.Serializable; - -import eu.dnetlib.dhp.schema.orcid.AuthorData; - -/** - * This class models the data related to a contributor, that are retrieved from an orcid publication - */ - -public class Contributor extends AuthorData implements Serializable { - private String sequence; - private String role; - private transient boolean simpleMatch; - private transient Double score; - private transient boolean bestMatch; - - public String getSequence() { - return sequence; - } - - public void setSequence(String sequence) { - this.sequence = sequence; - } - - public String getRole() { - return role; - } - - public void setRole(String role) { - this.role = role; - } - - public boolean isSimpleMatch() { - return simpleMatch; - } - - public void setSimpleMatch(boolean simpleMatch) { - this.simpleMatch = simpleMatch; - } - - public Double getScore() { - return score; - } - - public void setScore(Double score) { - this.score = score; - } - - public boolean isBestMatch() { - return bestMatch; - } - - public void setBestMatch(boolean bestMatch) { - this.bestMatch = bestMatch; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/ExternalId.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/ExternalId.java deleted file mode 100644 index d8f001aa5..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/ExternalId.java +++ /dev/null @@ -1,38 +0,0 @@ - -package eu.dnetlib.dhp.schema.orcid; - -import java.io.Serializable; - -/** - * This class models the data related to external id, that are retrieved from an orcid publication - */ - -public class ExternalId implements Serializable { - private String type; - private String value; - private String relationShip; - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public String getRelationShip() { - return relationShip; - } - - public void setRelationShip(String relationShip) { - this.relationShip = relationShip; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidData.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidData.java deleted file mode 100644 index 606eea6a8..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidData.java +++ /dev/null @@ -1,34 +0,0 @@ - -package eu.dnetlib.dhp.schema.orcid; - -import java.io.Serializable; - -public class OrcidData implements Serializable { - protected String base64CompressData; - protected String statusCode; - protected String downloadDate; - - public String getBase64CompressData() { - return base64CompressData; - } - - public void setBase64CompressData(String base64CompressData) { - this.base64CompressData = base64CompressData; - } - - public String getStatusCode() { - return statusCode; - } - - public void setStatusCode(String statusCode) { - this.statusCode = statusCode; - } - - public String getDownloadDate() { - return downloadDate; - } - - public void setDownloadDate(String downloadDate) { - this.downloadDate = downloadDate; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/PublicationDate.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/PublicationDate.java deleted file mode 100644 index 01972ce95..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/PublicationDate.java +++ /dev/null @@ -1,38 +0,0 @@ - -package eu.dnetlib.dhp.schema.orcid; - -import java.io.Serializable; - -/** - * This class models the data related to a publication date, that are retrieved from an orcid publication - */ - -public class PublicationDate implements Serializable { - private String year; - private String month; - private String day; - - public String getYear() { - return year; - } - - public void setYear(String year) { - this.year = year; - } - - public String getMonth() { - return month; - } - - public void setMonth(String month) { - this.month = month; - } - - public String getDay() { - return day; - } - - public void setDay(String day) { - this.day = day; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Work.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Work.java deleted file mode 100644 index c557eb5d2..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/Work.java +++ /dev/null @@ -1,16 +0,0 @@ - -package eu.dnetlib.dhp.schema.orcid; - -import java.io.Serializable; - -public class Work extends OrcidData implements Serializable { - WorkDetail workDetail; - - public WorkDetail getWorkDetail() { - return workDetail; - } - - public void setWorkDetail(WorkDetail workDetail) { - this.workDetail = workDetail; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/WorkDetail.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/WorkDetail.java deleted file mode 100644 index 614d415c1..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/WorkDetail.java +++ /dev/null @@ -1,109 +0,0 @@ - -package eu.dnetlib.dhp.schema.orcid; - -import java.io.Serializable; -import java.util.List; - -import eu.dnetlib.dhp.schema.orcid.Contributor; -import eu.dnetlib.dhp.schema.orcid.ExternalId; -import eu.dnetlib.dhp.schema.orcid.OrcidData; -import eu.dnetlib.dhp.schema.orcid.PublicationDate; - -/** - * This class models the data that are retrieved from orcid publication - */ - -public class WorkDetail implements Serializable { - - private String oid; - private String id; - private String sourceName; - private String type; - private List titles; - private List urls; - List extIds; - List publicationDates; - List contributors; - - public String getOid() { - return oid; - } - - public void setOid(String oid) { - this.oid = oid; - } - - public String getErrorCode() { - return errorCode; - } - - public void setErrorCode(String errorCode) { - this.errorCode = errorCode; - } - - private String errorCode; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getTitles() { - return titles; - } - - public void setTitles(List titles) { - this.titles = titles; - } - - public String getSourceName() { - return sourceName; - } - - public void setSourceName(String sourceName) { - this.sourceName = sourceName; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public List getUrls() { - return urls; - } - - public void setUrls(List urls) { - this.urls = urls; - } - - public List getExtIds() { - return extIds; - } - - public void setExtIds(List extIds) { - this.extIds = extIds; - } - - public List getPublicationDates() { - return publicationDates; - } - - public void setPublicationDates(List publicationDates) { - this.publicationDates = publicationDates; - } - - public List getContributors() { - return contributors; - } - - public void setContributors(List contributors) { - this.contributors = contributors; - } -} From ae7bd24d79ccd30d978f080da3b623f5090a8d5a Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 20 May 2021 18:32:22 +0200 Subject: [PATCH 003/287] removed old workflows --- .../oozie_app/config-default.xml | 31 - .../orcid_activities/oozie_app/workflow.xml | 542 ------------------ .../oozie_app/config-default.xml | 22 - .../oozie_app/workflow.xml | 505 ---------------- .../oozie_app/workflow.xml | 99 ---- .../oozie_app/workflow.xml | 232 -------- .../oozie_app/config-default.xml | 26 - .../oozie_app/workflow.xml | 40 -- .../oozie_app/config-default.xml | 26 - .../orcid_summaries/oozie_app/workflow.xml | 68 --- 10 files changed, 1591 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_authors_dois_data/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_authors_dois_data/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_doi_author_list/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_activities/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml deleted file mode 100644 index 05fe6d014..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml +++ /dev/null @@ -1,31 +0,0 @@ - - - oozie.action.sharelib.for.java - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - oozie.launcher.mapreduce.map.java.opts - -Xmx2g - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml deleted file mode 100644 index ea4d33296..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml +++ /dev/null @@ -1,542 +0,0 @@ - - - - workingPath - the working dir base path - - - shell_cmd_0 - wget -O /tmp/ORCID_2020_10_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/25002232 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_0.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_0.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_0.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 0 - - - shell_cmd_1 - wget -O /tmp/ORCID_2020_10_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/25002088 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_1.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_1.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_1.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 1 - - - shell_cmd_2 - wget -O /tmp/ORCID_2020_10_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/25000596 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_2.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_2.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_2.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 2 - - - shell_cmd_3 - wget -O /tmp/ORCID_2020_10_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/25015150 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_3.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_3.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_3.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 3 - - - shell_cmd_4 - wget -O /tmp/ORCID_2020_10_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/25033643 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_4.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_4.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_4.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 4 - - - shell_cmd_5 - wget -O /tmp/ORCID_2020_10_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/25005483 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_5.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_5.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_5.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 5 - - - shell_cmd_6 - wget -O /tmp/ORCID_2020_10_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/25005425 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_6.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_6.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_6.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 6 - - - shell_cmd_7 - wget -O /tmp/ORCID_2020_10_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/25012016 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_7.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_7.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_7.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 7 - - - shell_cmd_8 - wget -O /tmp/ORCID_2020_10_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/25012079 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_8.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_8.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_8.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 8 - - - shell_cmd_9 - wget -O /tmp/ORCID_2020_10_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/25010727 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_9.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_9.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_9.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 9 - - - shell_cmd_X - wget -O /tmp/ORCID_2020_10_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/25011025 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_X.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_X.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_X.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file X - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_0.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_0} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_0.tar.gz - -owno_doi_works/works_0.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_1.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_1} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_1.tar.gz - -owno_doi_works/works_1.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_2.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_2} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_2.tar.gz - -owno_doi_works/works_2.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_3.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_3} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_3.tar.gz - -owno_doi_works/works_3.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_4.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_4} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_4.tar.gz - -owno_doi_works/works_4.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_5.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_5} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_5.tar.gz - -owno_doi_works/works_5.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_6.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_6} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_6.tar.gz - -owno_doi_works/works_6.seq - -oewno_doi_enriched_works/ - - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_7.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_7} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_7.tar.gz - -owno_doi_works/works_7.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_8.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_8} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_8.tar.gz - -owno_doi_works/works_8.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_9.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_9} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_9.tar.gz - -owno_doi_works/works_9.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_X.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_X} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_X.tar.gz - -owno_doi_works/works_X.seq - -oewno_doi_enriched_works/ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_authors_dois_data/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_authors_dois_data/oozie_app/config-default.xml deleted file mode 100644 index 5621415d9..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_authors_dois_data/oozie_app/config-default.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.action.sharelib.for.java - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - oozie.launcher.mapreduce.map.java.opts - -Xmx4g - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_authors_dois_data/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_authors_dois_data/oozie_app/workflow.xml deleted file mode 100644 index 1c2ae89dd..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_authors_dois_data/oozie_app/workflow.xml +++ /dev/null @@ -1,505 +0,0 @@ - - - - workingPath_activities - the working dir base path - - - shell_cmd_0 - wget -O /tmp/ORCID_2019_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/18017660 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_0.tar.gz /data/orcid_activities/ORCID_2019_activites_0.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 0 - - - shell_cmd_1 - wget -O /tmp/ORCID_2019_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/18017675 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_1.tar.gz /data/orcid_activities/ORCID_2019_activites_1.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 1 - - - shell_cmd_2 - wget -O /tmp/ORCID_2019_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/18017717 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_2.tar.gz /data/orcid_activities/ORCID_2019_activites_2.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 2 - - - shell_cmd_3 - wget -O /tmp/ORCID_2019_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/18017765 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_3.tar.gz /data/orcid_activities/ORCID_2019_activites_3.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 3 - - - shell_cmd_4 - wget -O /tmp/ORCID_2019_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/18017831 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_4.tar.gz /data/orcid_activities/ORCID_2019_activites_4.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 4 - - - shell_cmd_5 - wget -O /tmp/ORCID_2019_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/18017987 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_5.tar.gz /data/orcid_activities/ORCID_2019_activites_5.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 5 - - - shell_cmd_6 - wget -O /tmp/ORCID_2019_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/18018053 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_6.tar.gz /data/orcid_activities/ORCID_2019_activites_6.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 6 - - - shell_cmd_7 - wget -O /tmp/ORCID_2019_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/18018023 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_7.tar.gz /data/orcid_activities/ORCID_2019_activites_7.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 7 - - - shell_cmd_8 - wget -O /tmp/ORCID_2019_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/18018248 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_8.tar.gz /data/orcid_activities/ORCID_2019_activites_8.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 8 - - - shell_cmd_9 - wget -O /tmp/ORCID_2019_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/18018029 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_9.tar.gz /data/orcid_activities/ORCID_2019_activites_9.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 9 - - - shell_cmd_X - wget -O /tmp/ORCID_2019_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/18018182 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_X.tar.gz /data/orcid_activities/ORCID_2019_activites_X.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file X - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_0.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_0} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen - -d${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_0.tar.gz - -ooutput/authors_dois_0.seq - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_1.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_1} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen - -d${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_1.tar.gz - -ooutput/authors_dois_1.seq - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_2.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_2} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen - -d${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_2.tar.gz - -ooutput/authors_dois_2.seq - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_3.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_3} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen - -d${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_3.tar.gz - -ooutput/authors_dois_3.seq - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_4.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_4} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen - -d${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_4.tar.gz - -ooutput/authors_dois_4.seq - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_5.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_5} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen - -d${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_5.tar.gz - -ooutput/authors_dois_5.seq - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_6.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_6} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen - -d${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_6.tar.gz - -ooutput/authors_dois_6.seq - - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_7.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_7} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen - -d${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_7.tar.gz - -ooutput/authors_dois_7.seq - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_8.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_8} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen - -d${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_8.tar.gz - -ooutput/authors_dois_8.seq - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_9.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_9} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen - -d${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_9.tar.gz - -ooutput/authors_dois_9.seq - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_X.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_X} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen - -d${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_X.tar.gz - -ooutput/authors_dois_X.seq - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_doi_author_list/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_doi_author_list/oozie_app/workflow.xml deleted file mode 100644 index 133a6f4bd..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_doi_author_list/oozie_app/workflow.xml +++ /dev/null @@ -1,99 +0,0 @@ - - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - spark2MaxExecutors - 20 - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - workingPath - the working dir base path - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - yarn-cluster - cluster - GenDoiAuthorList - eu.dnetlib.doiboost.orcid.SparkGenerateDoiAuthorList - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - -w${workingPath}/ - -aauthors/authors.seq - -xwxml/works/*.seq - -odoi_author_list/ - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_activities/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_activities/oozie_app/workflow.xml deleted file mode 100644 index 6f629c754..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_activities/oozie_app/workflow.xml +++ /dev/null @@ -1,232 +0,0 @@ - - - - workingPath - the working dir base path - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.java - ${oozieActionShareLibForSpark2} - - - oozie.launcher.mapreduce.user.classpath.first - true - - - oozie.launcher.mapreduce.map.java.opts - -Xmx2g - - - oozie.use.system.libpath - true - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_0.tar.gz - -owxml/works/xml_works_0.seq - -oew--- - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_1.tar.gz - -owxml/works/xml_works_1.seq - -oew--- - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_2.tar.gz - -owxml/works/xml_works_2.seq - -oew--- - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_3.tar.gz - -owxml/works/xml_works_3.seq - -oew--- - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_4.tar.gz - -owxml/works/xml_works_4.seq - -oew--- - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_5.tar.gz - -owxml/works/xml_works_5.seq - -oew--- - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_6.tar.gz - -owxml/works/xml_works_6.seq - -oew--- - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_7.tar.gz - -owxml/works/xml_works_7.seq - -oew--- - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_8.tar.gz - -owxml/works/xml_works_8.seq - -oew--- - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_9.tar.gz - -owxml/works/xml_works_9.seq - -oew--- - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_activites_X.tar.gz - -owxml/works/xml_works_X.seq - -oew--- - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/config-default.xml deleted file mode 100644 index 191654378..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/config-default.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - oozie.launcher.mapreduce.map.java.opts - -Xmx8g - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/workflow.xml deleted file mode 100644 index 68d468ab3..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/workflow.xml +++ /dev/null @@ -1,40 +0,0 @@ - - - - workingPath - the working dir base path - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.ExtractXMLSummariesData - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_summaries.tar.gz - -oxml/authors/ - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml deleted file mode 100644 index 191654378..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - oozie.launcher.mapreduce.map.java.opts - -Xmx8g - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml deleted file mode 100644 index 8517f35ee..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml +++ /dev/null @@ -1,68 +0,0 @@ - - - - workingPath - the working dir base path - - - shell_cmd_0 - wget -O /tmp/ORCID_2020_10_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/25032905 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_summaries.tar.gz /data/orcid_activities_2020/ORCID_2020_10_summaries.tar.gz ; rm -f /tmp/ORCID_2020_10_summaries.tar.gz - - the shell command that downloads and puts to hdfs orcid summaries - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2020_10_summaries.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_0} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidDSManager - -w${workingPath}/ - -n${nameNode} - -fORCID_2020_10_summaries.tar.gz - -oauthors/ - - - - - - - \ No newline at end of file From 1265dadc90686ad9a687a8bf7e20fd739d663ae9 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 20 May 2021 19:01:28 +0200 Subject: [PATCH 004/287] workflow aligned with stable_ids --- .../orcid/SparkDownloadOrcidAuthors.java | 2 +- .../orcidnodoi/oaf/PublicationToOaf.java | 4 +- .../oozie_app/workflow.xml | 42 ------------------- .../orcidnodoi/oozie_app/workflow.xml | 31 ++++++++++---- 4 files changed, 27 insertions(+), 52 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/import_orcid_no_doi/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 36b4b073d..8cf070213 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -62,7 +62,7 @@ public class SparkDownloadOrcidAuthors { isSparkSessionManaged, spark -> { String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); - logger.info("lastUpdate: ", lastUpdate); + logger.info("lastUpdate: {}", lastUpdate); if (StringUtils.isBlank(lastUpdate)) { throw new RuntimeException("last update info not found"); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 5c3236222..ccae4d976 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -174,7 +174,9 @@ public class PublicationToOaf implements Serializable { publication .getExternalReference() .add( - convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types")); + convertExtRef( + extId, classid, classname, ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES)); } }); diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/import_orcid_no_doi/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/import_orcid_no_doi/oozie_app/workflow.xml deleted file mode 100644 index becdf0974..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/import_orcid_no_doi/oozie_app/workflow.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - inputPath - /data/orcid_activities_2020/no_doi_dataset - path where retrieve the already generated action set - - - outputPath - /data/orcid_activities_2020/test_import_orcid_no_doi - path where to store the action set - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${inputPath}/* - ${outputPath} - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml index 6513ff7e1..365c4d5b4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml @@ -1,5 +1,15 @@ + + workingPath + /data/orcid_activities_2020 + path where the collection workflow stores the ORCID data + + + outputPath + path where to store the action set + + spark2GenNoDoiDatasetMaxExecutors 40 @@ -35,10 +45,6 @@ spark2EventLogDir spark 2.* event log dir location - - workingPath - the working dir base path - @@ -83,11 +89,20 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - -w${workingPath}/ - -n${nameNode} - -ilast_orcid_dataset - -oewno_doi_dataset + --workingPath${workingPath}/ + --hdfsServerUri${nameNode} + --orcidDataFolderlast_orcid_dataset + --outputEnrichedWorksPathno_doi_dataset + + + + + + + ${workingPath}/no_doi_dataset/* + ${outputPath} + From d0945c3c7817d7081b6fb214b6ec7ee227e3a396 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 20 May 2021 19:14:31 +0200 Subject: [PATCH 005/287] added temporary output folder, because of folder access rights are different on beta and prod --- .../dhp/doiboost/orcidnodoi/oozie_app/workflow.xml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml index 365c4d5b4..68f370a20 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml @@ -9,7 +9,10 @@ outputPath path where to store the action set - + + processOutputFolder + temporary path where to store the action set + spark2GenNoDoiDatasetMaxExecutors 40 @@ -66,7 +69,7 @@ - + @@ -92,7 +95,7 @@ --workingPath${workingPath}/ --hdfsServerUri${nameNode} --orcidDataFolderlast_orcid_dataset - --outputEnrichedWorksPathno_doi_dataset + --outputEnrichedWorksPath${processOutputFolder} @@ -100,7 +103,7 @@ - ${workingPath}/no_doi_dataset/* + ${workingPath}/${processOutputFolder}/* ${outputPath} From abdd0ade1fd154cbacedfea928788f600b3ba4a7 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Fri, 21 May 2021 12:08:16 +0200 Subject: [PATCH 006/287] added temporary output folder as workflow parameter --- .../eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml index 68f370a20..f7ac04821 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml @@ -7,10 +7,12 @@ outputPath + /data/orcid_activities_2020/no_doi_dataset_prod/ path where to store the action set processOutputFolder + process_no_doi_dataset_prod temporary path where to store the action set From 783988af06de0efd44ef7b5f0d5578bda7ed9ed9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 13 Jul 2021 11:17:25 +0200 Subject: [PATCH 007/287] depending on dhp-schemas:2.6.14 (release) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 45e230c4f..9396eeeb6 100644 --- a/pom.xml +++ b/pom.xml @@ -736,7 +736,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.14] + [2.6.14] [4.0.3] [6.0.5] [3.1.6] From 28a66af425a3128d6e3922831a49c954ccc729d6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 13 Jul 2021 11:52:24 +0200 Subject: [PATCH 008/287] updated URL in the issueManagement tag --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9396eeeb6..19d00d637 100644 --- a/pom.xml +++ b/pom.xml @@ -25,7 +25,7 @@ Redmine - https://issue.openaire.research-infrastructures.eu/projects/openaire + https://support.openaire.eu/projects/openaire From e0061232e96b8f3b4d20781ba794484366438cb4 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 13 Jul 2021 13:41:21 +0200 Subject: [PATCH 009/287] [aggregation] datacite wf: conditional creation of links, optional resume from intermediate phases --- .../datacite/oozie_app/workflow.xml | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml index 036178b37..3e0c2bba6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml @@ -13,15 +13,26 @@ 100 The request block size + + exportLinks + false + instructs the transformation phase to produce the links or not + - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + ${wf:conf('resumeFrom') eq 'TransformDatacite'} + + + @@ -49,8 +60,7 @@ - - + yarn-cluster cluster @@ -70,7 +80,7 @@ --sourcePath${mainPath}/datacite_dump --targetPath${mainPath}/datacite_oaf --isLookupUrl${isLookupUrl} - --exportLinkstrue + --exportLinks${exportLinks} --masteryarn-cluster From f5486ffb1413031c28c22b9453c9b376a4aaff00 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 14:07:45 +0200 Subject: [PATCH 010/287] Fixed issues to tests --- .../eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala index 7628fb853..d620c4ef3 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -61,7 +61,7 @@ class MappingORCIDToOAFTest { assertTrue(oA == p.count()) println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p.first())) - + spark.close() } @@ -78,7 +78,7 @@ class MappingORCIDToOAFTest { val oaf = ORCIDToOAF.convertTOOAF(orcid) assert(oaf.getPid.size() == 1) oaf.getPid.toList.foreach(pid => assert(pid.getQualifier.getClassid.equals("doi"))) - oaf.getPid.toList.foreach(pid => assert(pid.getValue.equals("10.1042/BCJ20160876".toLowerCase()))) + oaf.getPid.toList.foreach(pid => assert(pid.getValue.equals("10.1042/BCJ20160876"))) //println(mapper.writeValueAsString(ORCIDToOAF.convertTOOAF(orcid))) From 2f66fedfec8f393bb5b9b299ae32459c13f856a2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 14:22:23 +0200 Subject: [PATCH 011/287] changed the logic to verify if a community is contained in the list of context of a result --- .../dhp/oa/graph/dump/community/CommunitySplit.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index 55f075e95..9e071a356 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -57,16 +57,13 @@ public class CommunitySplit implements Serializable { Dataset community_products = result .filter((FilterFunction) r -> containsCommunity(r, c)); - try { - community_products.first(); + community_products .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) .json(outputPath + "/" + c); - } catch (Exception e) { - } } @@ -75,9 +72,9 @@ public class CommunitySplit implements Serializable { return r .getContext() .stream() - .filter(con -> con.getCode().equals(c)) - .collect(Collectors.toList()) - .size() > 0; + .map(con -> con.getCode()) + .collect(Collectors.toList()) + .contains(c) } return false; } From da88c850c61899e02278fa40b4a9a6fd7d5dacbb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 14:22:44 +0200 Subject: [PATCH 012/287] changed the logic to verify if a community is contained in the list of context of a result --- .../eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index 9e071a356..088ca1eaa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -74,7 +74,7 @@ public class CommunitySplit implements Serializable { .stream() .map(con -> con.getCode()) .collect(Collectors.toList()) - .contains(c) + .contains(c); } return false; } From 86e50f7311f9b914a01c0bbd1eaee54b925225ad Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 14:31:45 +0200 Subject: [PATCH 013/287] modified code to split the Croatian funder --- .../funderresults/SparkDumpFunderResults.java | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 00f604b14..261fb68bb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -8,6 +8,7 @@ import java.util.*; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.*; import org.slf4j.Logger; @@ -102,29 +103,49 @@ public class SparkDumpFunderResults implements Serializable { } else { funderdump = fundernsp.substring(0, fundernsp.indexOf("_")).toUpperCase(); } - writeFunderResult(funder, result, outputPath + "/" + funderdump); + writeFunderResult(funder, result, outputPath , funderdump); }); } - private static void writeFunderResult(String funder, Dataset results, String outputPath) { + private static void dumpResults(String nsp, Dataset results, String outputPath, String funderName, String funderDump) { results.map((MapFunction) r -> { if (!Optional.ofNullable(r.getProjects()).isPresent()) { return null; } for (Project p : r.getProjects()) { - if (p.getId().startsWith(funder)) { + if (p.getId().startsWith(nsp)) { + + if (nsp.equals("40|irb")) { + if (p.getFunder().getShortName().equals(funderName)) + return r; + else + return null; + } return r; } } return null; }, Encoders.bean(CommunityResult.class)) - .filter(Objects::nonNull) + .filter((FilterFunction) r -> r!= null) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath); + .json(outputPath + "/" + funderDump); } + + private static void writeFunderResult(String funder, Dataset results, String outputPath, + String funderDump) { + + if (funder.equals("40|irb")) { + dumpResults(funder, results, outputPath, "CSF", "HRZZ"); + dumpResults(funder, results, outputPath, "MSES", "MZOS"); + } else + dumpResults(funder, results, outputPath, funderDump, null); + + } + + } From 69fd40fd308a7d9806c7ef801c57782ba91c0d8d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 14:35:26 +0200 Subject: [PATCH 014/287] modified code to split the Croatian funder --- .../dump/funderresults/SparkDumpFunderResults.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 261fb68bb..b124b76ce 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -108,7 +108,7 @@ public class SparkDumpFunderResults implements Serializable { } - private static void dumpResults(String nsp, Dataset results, String outputPath, String funderName, String funderDump) { + private static void dumpResults(String nsp, Dataset results, String outputPath, String funderName) { results.map((MapFunction) r -> { if (!Optional.ofNullable(r.getProjects()).isPresent()) { @@ -117,7 +117,7 @@ public class SparkDumpFunderResults implements Serializable { for (Project p : r.getProjects()) { if (p.getId().startsWith(nsp)) { - if (nsp.equals("40|irb")) { + if (nsp.startsWith("40|irb")) { if (p.getFunder().getShortName().equals(funderName)) return r; else @@ -132,18 +132,18 @@ public class SparkDumpFunderResults implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + "/" + funderDump); + .json(outputPath + "/" + funderName); } private static void writeFunderResult(String funder, Dataset results, String outputPath, String funderDump) { - if (funder.equals("40|irb")) { - dumpResults(funder, results, outputPath, "CSF", "HRZZ"); - dumpResults(funder, results, outputPath, "MSES", "MZOS"); + if (funder.startsWith("40|irb")) { + dumpResults(funder, results, outputPath, "HRZZ"); + dumpResults(funder, results, outputPath, "MZOS"); } else - dumpResults(funder, results, outputPath, funderDump, null); + dumpResults(funder, results, outputPath, funderDump); } From 87a6e2b967bd27a1e3a226b18e315b7a203a8bbc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 14:38:28 +0200 Subject: [PATCH 015/287] extended test class --- .../dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java index 71bf5d942..eefb0b9cb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java @@ -137,5 +137,11 @@ public class SplitPerFunderTest { .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); Assertions.assertEquals(3, tmp.count()); + // MZOS 1 + tmp = sc + .textFile(workingDir.toString() + "/split/MZOS") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(1, tmp.count()); + } } From 4b432fbee8293f57afa3bc75d4d7416315bafcc0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 14:40:39 +0200 Subject: [PATCH 016/287] extended test class --- .../dhp/oa/graph/dump/funderresource/extendeddump/relation | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation index 3ed91f451..f88678437 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation @@ -5,4 +5,5 @@ {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::8b75543067b50076e70764917e188178","subRelType":"provision","target":"40|snsf________::50cb15ff7a6a3f8531f063770179e346"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::9f3ff882f023209d9ffb4dc32b77d376","subRelType":"provision","target":"40|corda_______::ffc1811633b3222e4764c7b0517f83e8"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::b566fa319c3923454e1e8eb886ab62d2","subRelType":"provision","target":"40|nhmrc_______::4e6c928fef9851b37ec73f4f6daca35b"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","subRelType":"provision","target":"40|corda__h2020::846b777af165fef7c904a81712a83b66"} \ No newline at end of file +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","subRelType":"provision","target":"40|corda__h2020::846b777af165fef7c904a81712a83b66"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"40|irb_hr______::1e5e62235d094afd01cd56e65112fc63"} \ No newline at end of file From 65a242646d8a45e533bee04d5f0db54be5dd49bb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 14:45:25 +0200 Subject: [PATCH 017/287] added resource for APC dump --- .../oa/graph/dump/resultDump/publication_pca | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca new file mode 100644 index 000000000..e15d84528 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca @@ -0,0 +1,25 @@ +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T16:14:18.452Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Lit.opg., bijl."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::0224aae28af558f21768dbc6439c7a95","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"8250"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282676557,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:51:24Z","harvestDate":"2020-05-25T11:33:13.427Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550013110","metadataNamespace":""}},"originalId":["DansKnawCris::0224aae28af558f21768dbc6439c7a95"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"prospectie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Inventariserend veldonderzoek d.m.v. boringen (karterende fase) : Raadhuisstraat te Dirkshorn, gemeente Harenkarspel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Synthegra Archeologie Rapportenreeks P0502381"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:03:57.761Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount": {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1396"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283087415,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:58:39Z","harvestDate":"2020-05-25T11:34:38.707Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800020324","metadataNamespace":""}},"originalId":["DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800020324"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800020324"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:13:23.976Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::26780065282e607306372abd0d808245","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282897527,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:42:33Z","harvestDate":"2020-05-25T11:40:10.845Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550053196","metadataNamespace":""}},"originalId":["DansKnawCris::26780065282e607306372abd0d808245"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550053196"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550053196"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"van Someren, Christian","name":"Christian","pid":[],"rank":1,"surname":"van Someren"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"science-innovation-policy"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-11-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T17:55:32.27Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Over het Energieakkoord. In het energieakkoord voor duurzame groei is afgesproken dat in 2020 14 procent van de opwek hernieuwbaar moet zijn en in 2023 16 procent. De doelstelling is een uitdagende opgave waarbij de eerste vraag is: \"Hoeveel hernieuwbare energie wordt er op dit moment opgewekt in Nederland?\" Deze website geeft antwoord op de vraag voor de actueel opgewekte windenergie, zonne-energie en biogas."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-11-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":["http://energieopwek.nl/"]}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282663379,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-05-08T17:46:55Z","harvestDate":"2020-05-25T15:30:24.079Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/hbo:oai:hbokennisbank.nl:hanzepure:oai:research.hanze.nl:publications/813a5dfa-4fd0-44c4-8cbf-310324dc724d","metadataNamespace":""}},"originalId":["DansKnawCris::3c81248c335f0aa07e06817ece6fa6af"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:hs:18-813a5dfa-4fd0-44c4-8cbf-310324dc724d"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:hs:18-813a5dfa-4fd0-44c4-8cbf-310324dc724d"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0038","classname":"0038","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"energieproductie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Management"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Monitoring"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Policy and Law"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Energie interventies en gedrag"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"publieke ondersteuning en communicatie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Professional practice & society"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Energie opwek"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:55:40.568Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282670417,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:41:34Z","harvestDate":"2020-05-25T11:40:05.974Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550052278","metadataNamespace":""}},"originalId":["DansKnawCris::3c9f068ddc930360bec6925488a9a97f"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550052278"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550052278"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T18:11:57.737Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met lit. opg"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::4669a378a73661417182c208e6fdab53","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282758835,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:36:00Z","harvestDate":"2020-05-25T11:30:47.199Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800007467","metadataNamespace":""}},"originalId":["DansKnawCris::4669a378a73661417182c208e6fdab53"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800007467"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800007467"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Vlaardingen"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Het Hof en Oranjepark : gemeente Vlaardingen : archeologisch vooronderzoek: een inventariserend veldonderzoek (verkennende fase)"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"RAAP-rapport, ISSN 0925-6229 2089"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T18:18:18.049Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met literatuuropgave"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283010899,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:48:12Z","harvestDate":"2020-05-25T11:32:46.363Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800014509","metadataNamespace":""}},"originalId":["DansKnawCris::4a9152e80f860eab99072e921d74a0ff"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800014509"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800014509"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Bladel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proefsleuvenonderzoek aan de Bredasebaan 8 te Bladel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Archeodienst rapport, ISSN 1877-2900 565"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T18:31:28.838Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Lit.opg."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::52c4541c9bffde34daa945ece8dcf635","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283000091,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:52:57Z","harvestDate":"2020-05-25T11:33:31.012Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550013915","metadataNamespace":""}},"originalId":["DansKnawCris::52c4541c9bffde34daa945ece8dcf635"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013915"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013915"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"prospectie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Honselersdijk tracé persleiding (gemeente Westland) : een bureauonderzoek en inventariserend veldonderzoek in de vorm van een verkennend en karterend booronderzoek"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ADC-rapport 995"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T18:32:32.283Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::53842d77ea8c021a3ad5b401a8c7458b","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282538754,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:28:51Z","harvestDate":"2020-05-25T11:38:50.591Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550039885","metadataNamespace":""}},"originalId":["DansKnawCris::53842d77ea8c021a3ad5b401a8c7458b"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550039885"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550039885"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T19:32:26.285Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::794d07c2e66f1fbf07d61b9bfca36dc2","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282847497,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:14:04Z","harvestDate":"2020-05-25T11:36:34.251Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550028404","metadataNamespace":""}},"originalId":["DansKnawCris::794d07c2e66f1fbf07d61b9bfca36dc2"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550028404"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550028404"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Archeologisch onderzoek plangebied Akker-Boekenderweg te Thorn"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Grontmij Archeologische Rapporten 1005"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T19:35:29.875Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met lit. opg"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::7b844ee57dcf7d57148ab8ef6dc5ff88","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283003396,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:27:10Z","harvestDate":"2020-05-25T11:29:19.356Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800002012","metadataNamespace":""}},"originalId":["DansKnawCris::7b844ee57dcf7d57148ab8ef6dc5ff88"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800002012"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800002012"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Abcoude"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Archeologisch onderzoek tracé drukriolering Abcoude : bureauonderzoek"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Grontmij archeologische rapporten, ISSN 1573-5710 261"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"Zwieten, van, P.A.M.","name":"van, P.A.M.","pid":[],"rank":1,"surname":"Zwieten"},{"affiliation":[],"fullname":"Banda, M.","name":"M.","pid":[],"rank":2,"surname":"Banda"},{"affiliation":[],"fullname":"Kolding, J.","name":"J.","pid":[],"rank":3,"surname":"Kolding"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T21:08:20.946Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The provision of management information on the fisheries of Lakes Malawi and Malombe has been characterised by top–down controlled single species steady-state assessment techniques originating from single gear industrial fisheries but applied to an open access highly diverse and adaptive small-scale multispecies and multi-gear fishery. The result has largely been an unhappy marriage with uncertainties blamed more on the data than the process, although the data collection generally is detailed and comprehensive on catch and effort parameters. An extensive literature review of primary and grey literature on ecosystem drivers, exploitation pressures, and fish population and community states shows that Malawi has the necessary knowledge base for expanding their assessment into multi-causal and exploratory indicator-based methods that can assist in better understanding and more disciplined use of existing data and monitoring systems. Selection and ranking of a suite of indicators focusing on the major fisheries in the Southeast arm of Lake Malawi and Lake Malombe were done by a group of Malawian fisheries researchers and management advisers, thereby testing a framework of scoring criteria assessing an indicator's acceptability, observability, and relatedness to management. Indicators that are close to raw observational data and that require limited permutations and few assumptions appear to be preferable in the Malawian context. CPUE-based assessments can improve the utility of data and information in communicating developments and processes and evaluate fisheries management policies"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::b90247718304c409331edb82fd0e8d56","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"en","classname":"en","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282621858,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-05-10T00:13:05Z","harvestDate":"2020-05-25T15:41:26.786Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/wur:oai:library.wur.nl:wurpubs/401847","metadataNamespace":""}},"originalId":["DansKnawCris::b90247718304c409331edb82fd0e8d56"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:32-401847"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:32-401847"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"african great-lakes"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"cichlid fishes"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"reference points"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"east-africa"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"management"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"perspective"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"tanganyika"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"diversity"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"history"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"nyasa"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Selecting indicators to assess the fisheries of Lake Malawi and Lake Malombe: Knowledge base and evaluative capacity"}],"journal":{"conferencedate":null,"conferenceplace":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"44","iss":"1","issnLinking":"","issnOnline":"","issnPrinted":"0380-1330","name":"Journal of Great Lakes Research","sp":"26","vol":"37"}} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T21:11:18.301Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::ba8f1ea3adf1bba501ec9da3a58e9638","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282723014,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:03:16Z","harvestDate":"2020-05-25T11:35:22.74Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550019564","metadataNamespace":""}},"originalId":["DansKnawCris::ba8f1ea3adf1bba501ec9da3a58e9638"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550019564"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550019564"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Bureauonderzoek"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Clavecymbelstraat te Maastricht"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Synthegra BV/Verhoeve Groep Rapportage 176146"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T21:31:42.662Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"110402/WA6/3O1/001074.001."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::c96a83dad021ecfc88d256a9622f30b3","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282599877,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:42:36Z","harvestDate":"2020-05-25T11:31:54.779Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550009089","metadataNamespace":""}},"originalId":["DansKnawCris::c96a83dad021ecfc88d256a9622f30b3"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550009089"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550009089"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"prospectie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Archeologisch bureau- en inventariserend veldonderzoek kadeverbetering Diemen westzijde"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::437f4b072b1aa198adcbc35910ff3b98","value":"CORE"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-02-27T18:44:17.894Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|core________::4f6d6cdddfac6cabae934a8a0d30b8c5","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::437f4b072b1aa198adcbc35910ff3b98","value":"CORE"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::8f87e10869299a5fe80b315695296b88","value":"Elsevier"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.1016/S2214-6873(14)00065-X"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282750272,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https://dev-openaire.d4science.org/RS/Elsevier/data/elsevier/metadata/","datestamp":"","harvestDate":"2018-11-10T01:46:09.648Z","identifier":"","metadataNamespace":""}},"originalId":["core________::4f6d6cdddfac6cabae934a8a0d30b8c5"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/S2214-6873(14)00065-X"}],"publisher":null,"relevantdate":[],"resourcetype":null,"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8739"},"qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"humanities"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.7884"},"qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"technology, industry, and agriculture"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.7803"},"qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"human activities"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.7461"},"qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"education"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8397"},"qualifier":{"classid":"acm","classname":"acm","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"ComputingMilieux_THECOMPUTINGPROFESSION"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.7542"},"qualifier":{"classid":"acm","classname":"acm","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"GeneralLiterature_MISCELLANEOUS"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"General Information"}],"journal":{"conferencedate":null,"conferenceplace":null,"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"4","issnLinking":"","issnOnline":"","issnPrinted":"22146873","name":"Asia-Pacific Journal of Sports Medicine, Arthroscopy, Rehabilitation and Technology","sp":"iii","vol":""}} +{"author":[{"affiliation":[],"fullname":"Wegwarth, Odette","name":"Odette","pid":[],"rank":1,"surname":"Wegwarth"},{"affiliation":[],"fullname":"Pashayan, Nora","name":"Nora","pid":[],"rank":2,"surname":"Pashayan"},{"affiliation":[],"fullname":"Widschwendter, Martin","name":"Martin","pid":[],"rank":3,"surname":"Widschwendter"},{"affiliation":[],"fullname":"Rebitschek, Felix","name":"Felix","pid":[],"rank":4,"surname":"Rebitschek"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"science-innovation-policy"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"STROBE checklist. (DOC 98 kb)"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::016dfd6ecbfaa929bbd90ec3a2b51521","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::efa54c53aaa7757e9ee2c9d41b134d73","value":"figshare"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.6084/m9.figshare.8207303.v1"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282908770,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-10T12:22:40.605Z","identifier":"10.6084/m9.figshare.8207303.v1","metadataNamespace":""}},"originalId":["datacite____::016dfd6ecbfaa929bbd90ec3a2b51521"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.6084/m9.figshare.8207303.v1"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Figshare"},"relevantdate":[],"resourcetype":{"classid":"Journal contribution","classname":"Journal contribution","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Medicine"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Molecular Biology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Sociology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"19999 Mathematical Sciences not elsewhere classified"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Developmental Biology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Cancer"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Science Policy"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Additional file 1: of Womenâ s perception, attitudes, and intended behavior towards predictive epigenetic risk testing for female cancers in 5 European countries: a cross-sectional online survey"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"Archaeology South East","name":"","pid":[],"rank":1,"surname":""}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:datasource","classname":"Bulktagging for Community - Datasource","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"","dateoftransformation":"","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"PDF"}],"fulltext":[],"id":"50|datacite____::05c611fdfc93d7a2a703d1324e28104a","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::a48f09c562b247a9919acfe195549b47","value":"Archaeology Data Service"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"3131.64"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":["http://dx.doi.org/10.5284/1003453"]}],"language":{"classid":"en","classname":"en","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282746270,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-11T02:13:00.271Z","identifier":"10.5284/1003453","metadataNamespace":""}},"originalId":["datacite____::05c611fdfc93d7a2a703d1324e28104a"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5284/1003453"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Archaeology Data Service"},"relevantdate":[],"resourcetype":{"classid":"Report","classname":"Report","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Grey Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"An Archaeological Evaluation at 290 -294 Golder's Green Road, London Borough of Barnet NW11"}],"journal":null} +{"author":[{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"[u'Aeres Milieu']"}],"fullname":"Veen, V. Van Der","name":"V.","pid":[],"rank":1,"surname":"Veen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Feest, NJW Van Der"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aeres Milieu"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"dateofcollection":"","dateoftransformation":"","description":[],"embargoenddate":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-07-06"},"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|datacite____::0a15c3ad876db2ffa2f8f1c9a63c3cd0","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::2559b4dd27ae4bc6c7f3727ec863444f","value":"DataverseNL"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.17026/dans-24w-bckq"]}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283098682,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-05-24T00:09:28.861Z","identifier":"10.17026/dans-24w-bckq","metadataNamespace":""}},"originalId":["datacite____::0a15c3ad876db2ffa2f8f1c9a63c3cd0"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-24w-bckq"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Aeres Milieu"},"relevantdate":[],"resourcetype":{"classid":"text","classname":"text","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"verkennend booronderzoek"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Onbekend (XXX)"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Archeologisch bureau- en verkennend veldonderzoek door middel van boringen Twaalf apostelenweg te Nijmegen"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"AM13379"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"Daas, Martinus","name":"Martinus","pid":[],"rank":1,"surname":"Daas"},{"affiliation":[],"fullname":"Weijer, Antonius Van De","name":"Antonius","pid":[],"rank":2,"surname":"Weijer"},{"affiliation":[],"fullname":"Vos, Willem De","name":"Willem","pid":[],"rank":3,"surname":"Vos"},{"affiliation":[],"fullname":"Oost, John Van Der","name":"John","pid":[],"rank":4,"surname":"Oost"},{"affiliation":[],"fullname":"Kranenburg, Richard Van","name":"Richard","pid":[],"rank":5,"surname":"Kranenburg"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"science-innovation-policy"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Additional file 3: Table S2. HPLC data from isolates ranked on total organic acid production and total lactic acid production on cellobiose (C6)."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::150d96a57b37c02f5d14a6ace965d790","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::efa54c53aaa7757e9ee2c9d41b134d73","value":"figshare"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.6084/m9.figshare.c.3603305_d2"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283087583,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-10T09:54:38.351Z","identifier":"10.6084/m9.figshare.c.3603305_d2","metadataNamespace":""}},"originalId":["datacite____::150d96a57b37c02f5d14a6ace965d790"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.6084/m9.figshare.c.3603305_d2"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Figshare"},"relevantdate":[],"resourcetype":{"classid":"Paper","classname":"Paper","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Microbiology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Genetics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Molecular Biology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Biotechnology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"59999 Environmental Sciences not elsewhere classified"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"39999 Chemical Sciences not elsewhere classified"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Ecology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Immunology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Inorganic Chemistry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Science Policy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Plant Biology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"MOESM3 of Isolation of a genetically accessible thermophilic xylan degrading bacterium from compost"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"Mosleh, Marwan","name":"Marwan","pid":[],"rank":1,"surname":"Mosleh"},{"affiliation":[],"fullname":"Jeesh, Yousef Al","name":"Yousef Al","pid":[],"rank":2,"surname":"Jeesh"},{"affiliation":[],"fullname":"Koustuv Dalal","name":"","pid":[],"rank":3,"surname":""},{"affiliation":[],"fullname":"Charli Eriksson","name":"","pid":[],"rank":4,"surname":""},{"affiliation":[],"fullname":"Carlerby, Heidi","name":"Heidi","pid":[],"rank":5,"surname":"Carlerby"},{"affiliation":[],"fullname":"Viitasara, Eija","name":"Eija","pid":[],"rank":6,"surname":"Viitasara"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"science-innovation-policy"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2020-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Additional file 1. Study discussion guides."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::1e099cf76219212d554ad35b45d2345c","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2020-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::efa54c53aaa7757e9ee2c9d41b134d73","value":"figshare"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.6084/m9.figshare.12285566"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283012211,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-05-16T00:17:29.737Z","identifier":"10.6084/m9.figshare.12285566","metadataNamespace":""}},"originalId":["datacite____::1e099cf76219212d554ad35b45d2345c"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.6084/m9.figshare.12285566"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"figshare"},"relevantdate":[],"resourcetype":{"classid":"Journal contribution","classname":"Journal contribution","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Medicine"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Ecology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Sociology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"69999 Biological Sciences not elsewhere classified"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Science Policy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.891"},"qualifier":{"classid":"acm","classname":"acm","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Data_FILES"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Additional file 1 of Barriers to managing and delivery of care to war-injured survivors or patients with non-communicable disease: a qualitative study of Palestinian patients’ and policy-makers’ perspectives"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"Kuijl, E.E.A. Van Der","name":"E. E. A.","pid":[],"rank":1,"surname":"Kuijl"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Janssens, M."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Hoeven, F. Van Der"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Hensen, G."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Coolen, J."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Wijnen, J."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Synthegra Archeologie"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"dateofcollection":"","dateoftransformation":"","description":[],"embargoenddate":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-11-27"},"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"25 p."}],"fulltext":[],"id":"50|datacite____::24cce4f26db20e50803102c3c7961a8e","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::2559b4dd27ae4bc6c7f3727ec863444f","value":"DataverseNL"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/publicdomain/zero/1.0"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.17026/dans-zwz-6cvc"]}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282632316,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-05-24T00:19:06.371Z","identifier":"10.17026/dans-zwz-6cvc","metadataNamespace":""}},"originalId":["datacite____::24cce4f26db20e50803102c3c7961a8e"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-zwz-6cvc"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Synthegra"},"relevantdate":[],"resourcetype":{"classid":"text","classname":"text","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"PROSPECTIE"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Inventariserend veldonderzoek d.m.v. boringen, Ittervoorterweg te Swartbroek"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"SYNTHEGRA 2005 239"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"SYNTHEGRA 2005 239"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"SYNTHEGRA 2005 239"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"Embree, Jennifer","name":"Jennifer","pid":[],"rank":1,"surname":"Embree"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Over the last century, conflicts across the world have resulted in an unprecedented number of cultural heritage sites being purposefully targeted for destruction. While there have been several historical attempts to combat this destruction, the emerging field of digital humanities is now using new digital technologies to also document and preserve cultural heritage demolishment. This article conducts case studies of two such projects: Project Syria, a virtual reality experience documenting the Syrian Civil War, and Manar al-Athar, a digital photo archive that collects pictures of cultural heritage sites in the Middle East. This exploratory study seeks to compare past methods of preservation and documentation of cultural heritage during times of conflict to current methods of preservation and documentation through digital humanities projects, and to determine what digital humanities projects can accomplish that more traditional methods of preservation cannot."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::26243564100cd29b39382d2321372a95","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::0394b97eb11f19785cbca1ec830429da","value":"UNC Dataverse"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.17615/xh7w-qv18"]}],"language":{"classid":"English","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282702184,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-11T05:09:49.694Z","identifier":"10.17615/xh7w-qv18","metadataNamespace":""}},"originalId":["datacite____::26243564100cd29b39382d2321372a95"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17615/xh7w-qv18"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The University of North Carolina at Chapel Hill University Libraries"},"relevantdate":[],"resourcetype":{"classid":"Masters Paper","classname":"Masters Paper","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Destruction of cultural property"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Digital humanities"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Syria--History--Syrian Civil War, 2011-"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Archives by Any Other name: Archiving Memory During Times of Conflict through Non-Traditional Methods--A Case Study on Digital Humanities Projects Manar al-Athar and Project Syria"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"Huber, Brigitte","name":"Brigitte","pid":[],"rank":1,"surname":"Huber"},{"affiliation":[],"fullname":"Barnidge, Matthew","name":"Matthew","pid":[],"rank":2,"surname":"Barnidge"},{"affiliation":[],"fullname":"Zúñiga, Homero Gil De","name":"Homero Gil","pid":[],"rank":3,"surname":"Zúñiga"},{"affiliation":[],"fullname":"Liu, James","name":"James","pid":[],"rank":4,"surname":"Liu"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"science-innovation-policy"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Supplemental material, Supplemental_Material for Fostering public trust in science: The role of social media by Brigitte Huber, Matthew Barnidge, Homero Gil de Zúñiga and James Liu in Public Understanding of Science"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::2d1773354e6c79eee7001407cd8da2f0","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.25384/sage.9869183.v1"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282547342,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-11T04:44:26.733Z","identifier":"10.25384/sage.9869183.v1","metadataNamespace":""}},"originalId":["datacite____::2d1773354e6c79eee7001407cd8da2f0"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.25384/sage.9869183.v1"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"SAGE Journals"},"relevantdate":[],"resourcetype":{"classid":"Journal contribution","classname":"Journal contribution","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"200199 Communication and Media Studies not elsewhere classified"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Science Policy"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Supplemental_Material – Supplemental material for Fostering public trust in science: The role of social media"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"Шогенцукова Залина Хасановна","name":"","pid":[],"rank":1,"surname":""},{"affiliation":[],"fullname":"Гедгафова Ирина Юрьевна","name":"","pid":[],"rank":2,"surname":""},{"affiliation":[],"fullname":"Мирзоева Жанна Мухарбиевна","name":"","pid":[],"rank":3,"surname":""},{"affiliation":[],"fullname":"Шогенцуков Али Хасанович","name":"","pid":[],"rank":4,"surname":""}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"aginfra"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Статья посвящена исследованию понятия кластера и его использования как инструмента управления повышения эффективности деятельности агропромышленным комплексом Кабардино-Балкарской Республики. Рассматриваются предпосылки и особенности кластеризации АПК как в отдельном регионе, так и в России в целом. Реализация кластерной политики в области сельского хозяйства России является инновационным подходом развития отрасли и повышения конкурентоспособности производимой продукции на рынке, повышения эффективности производственного процесса и т.д. В статье исследована модель «тройной спирали», используемой при создании и функционировании кластеров в сфере АПК. Исследование кластеров, как инструмент управления АПК отдельного региона, в частности Кабардино-Балкарской Республики, позволяет выявить факторы, обуславливающие необходимость данного процесса с одной стороны, а также выявлять резервы и иные возможности для общего развития эффективности АПК России и активации внедрения инновационных механизмов в сельское хозяйство."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The Article is devoted to the study of the concept of cluster and their use as a management tool to improve the efficiency of the agro-industrial complex of the KabardinoBalkaria Republic. The prerequisites and features of agribusiness clustering both in a separate region and in Russia as a whole are considered. The implementations of the cluster policy in the field of agriculture in Russia are an innovative approach to the development of the industry and improve the competitiveness of products in the market, improve the efficiency of the production process, etc. The article investigates the model of “triple helix” used in the creation and functioning of clusters in the field of agriculture. The study of clusters as an instrument of agribusiness management in a particular region, in particular the Kabardino-Balkaria Republic, allows to identify the factors causing the need for this process on the one hand, as well as to identify reserves and other opportunities for the overall development of the efficiency of the Russian agribusiness and the activation of the introduction of innovative mechanisms in agriculture."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::2fa3de5d0846180a43214310234e5526","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.24411/2413-046x-2019-16022"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283178985,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-11T02:21:45.926Z","identifier":"10.24411/2413-046x-2019-16022","metadataNamespace":""}},"originalId":["datacite____::2fa3de5d0846180a43214310234e5526"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.24411/2413-046x-2019-16022"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Московский экономический журнал"},"relevantdate":[],"resourcetype":{"classid":"Paper","classname":"Paper","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"кластеры"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"урожайность в овощеводстве"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"селекция"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"современные технологии"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"«продовольственная безопасность»"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"АПК"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"растениеводство"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"животноводство"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"модель «тройной спирали»"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"модернизация"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"селекция."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"clusters"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"productivity in vegetable growing"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"selection"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"modern technologies"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"“food security”"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"agriculture"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"crop production"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"animal husbandry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"“triple helix” model"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"modernization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"selection."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"clusters"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"productivity in vegetable growing"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"selection"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"modern technologies"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"“food security”"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"agriculture"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"crop production"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"animal husbandry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"“triple helix” model"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"modernization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"selection."}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Кластеры как инструмент управления агробизнесом Кабардино-Балкарской Республики"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Clusters as instrument of management of agrobusiness of Kabardino-Balkar Republic"}],"journal":null} +{"author":[{"affiliation":null,"fullname":"Schubart, A.","name":"A.","pid":[],"rank":1,"surname":"Schubart"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"dataInfo":null,"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"dataInfo":null,"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8"},"dateofacceptance":{"dataInfo":null,"value":"1866-01-01"},"dateofcollection":"2019-02-17T01:33:44Z","dateoftransformation":null,"description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"n/a"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"dateofacceptance":{"dataInfo":null,"value":"1866-01-01"},"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.pdf","http://dx.doi.org/10.1524/phil.1866.24.14.561"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"dateofacceptance":null,"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":null,"value":"cc0"},"processingchargeamount":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2578.35"},"processingchargecurrency":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EUR"},"refereed":null,"url":["https://zenodo.org/record/1634826/files/article.pdf"]},{"accessright":null,"collectedfrom":{"dataInfo":null,"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"dateofacceptance":null,"distributionlocation":null,"hostedby":{"dataInfo":null,"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","https://academic.microsoft.com/#/detail/2601803436"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1866-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://zenodo.org/record/1634826"]}],"language":null,"lastupdatetimestamp":1591290751234,"oaiprovenance":null,"originalId":["10.1524/phil.1866.24.14.561","2601803436","od______2659::f9ba286bcf8429160b58072028052325"],"pid":[{"dataInfo":null,"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1524/phil.1866.24.14.561"}],"publisher":{"dataInfo":null,"value":"Walter de Gruyter GmbH"},"relevantdate":[{"dataInfo":null,"qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-03-26T10:09:50Z"},{"dataInfo":null,"qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1866-01-01"}],"resourcetype":{"classid":"Other","classname":"Other","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":null,"value":"Crossref"},{"dataInfo":null,"value":null}],"subject":[{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Theology"},{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"History"},{"dataInfo":null,"qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Art history"}],"title":[{"dataInfo":null,"qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"XIV. Die wörter άγαλμα, είχών, ξόανον, ανδρuις und verwandte, in ihren verschiedenen beziehungen. Nach Pausanias"},{"dataInfo":null,"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"xiv die worter άγαλμα eίχών ξόανον ανδρuις und verwandte in ihren verschiedenen beziehungen nach pausanias"}],"journal":{"conferencedate":null,"conferenceplace":null,"dataInfo":null,"edition":null,"ep":null,"iss":null,"issnLinking":null,"issnOnline":"2196-7008","issnPrinted":"0031-7985","name":"Philologus","sp":null,"vol":"24"}} \ No newline at end of file From 6410ab71d8bf6f936503ad760f0a00e040fae80f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 15:13:58 +0200 Subject: [PATCH 018/287] added APC in the dump and test method --- .../dhp/oa/graph/dump/ResultMapper.java | 8 +++ .../dhp/oa/graph/dump/DumpJobTest.java | 54 +++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index d30b3122c..1a4e7c42a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -423,6 +423,14 @@ public class ResultMapper implements Serializable { .ofNullable(i.getInstancetype()) .ifPresent(value -> instance.setType(value.getClassname())); Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); + Optional> oPca = Optional.ofNullable(i.getProcessingchargeamount()); + Optional> oPcc = Optional.ofNullable(i.getProcessingchargecurrency()); + if (oPca.isPresent() && oPcc.isPresent()) { + APC apc = new APC(); + apc.setCurrency(oPcc.get().getValue()); + apc.setAmount(oPca.get().getValue()); + instance.setArticleprocessingcharge(apc); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 7c69c9635..f0842c844 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -13,6 +13,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.*; import org.slf4j.Logger; @@ -408,4 +409,57 @@ public class DumpJobTest { } + @Test + public void testArticlePCA() { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca") + .getPath(); + + final String communityMapPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); + + DumpProducts dump = new DumpProducts(); + dump + .run( + // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); + + Assertions.assertEquals(23, verificationDataset.count()); + //verificationDataset.show(false); + + Assertions.assertEquals(23, verificationDataset.filter("type = 'publication'").count()); + + verificationDataset.createOrReplaceTempView("check"); + + org.apache.spark.sql.Dataset temp = spark.sql("select id " + + "from check " + + "lateral view explode (instance) i as inst " + + "where inst.articleprocessingcharge is not null"); + + Assertions.assertTrue(temp.count() == 2); + + Assertions.assertTrue(temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").count() == 1); + + Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1); + + + +// verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset +// .filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'") +// .count() + +//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) + } + } From 13c66e16be2ce8503ffe48c002adee17637ab2e5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 15:15:27 +0200 Subject: [PATCH 019/287] changed logic to split for communities --- .../graph/dump/community/CommunitySplit.java | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index 088ca1eaa..ec16a65e0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -57,13 +57,11 @@ public class CommunitySplit implements Serializable { Dataset community_products = result .filter((FilterFunction) r -> containsCommunity(r, c)); - - community_products - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath + "/" + c); - + community_products + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath + "/" + c); } @@ -72,9 +70,9 @@ public class CommunitySplit implements Serializable { return r .getContext() .stream() - .map(con -> con.getCode()) - .collect(Collectors.toList()) - .contains(c); + .map(con -> con.getCode()) + .collect(Collectors.toList()) + .contains(c); } return false; } From 9a58f1b93ddc0b68415452772cf7f5f167fa0c46 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 15:20:39 +0200 Subject: [PATCH 020/287] added logic to select only the valid relations: those not deletedbyinference and having both part of the relation as entities in the graph --- .../SparkSelectValidRelationsJob.java | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java new file mode 100644 index 000000000..fdd545ff7 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java @@ -0,0 +1,127 @@ +package eu.dnetlib.dhp.oa.graph.dump.complete; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.schema.oaf.*; + +public class SparkSelectValidRelationsJob implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelationsJob.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkSelectValidRelationsJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + selectValidRelation(spark, inputPath, outputPath); + + }); + + } + + private static void selectValidRelation(SparkSession spark, String inputPath, String outputPath) { + Dataset relation = Utils.readPath(spark, inputPath + "/relation", Relation.class); + Dataset publication = Utils.readPath(spark, inputPath + "/publication", Publication.class); + Dataset dataset = Utils + .readPath(spark, inputPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class); + Dataset software = Utils.readPath(spark, inputPath + "/software", Software.class); + Dataset other = Utils + .readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class); + Dataset organization = Utils.readPath(spark, inputPath + "/organization", Organization.class); + Dataset project = Utils.readPath(spark, inputPath + "/project", Project.class); + Dataset datasource = Utils.readPath(spark, inputPath + "/datasource", Datasource.class); + + relation.createOrReplaceTempView("relation"); + publication.createOrReplaceTempView("publication"); + dataset.createOrReplaceTempView("dataset"); + other.createOrReplaceTempView("other"); + software.createOrReplaceTempView("software"); + organization.createOrReplaceTempView("organization"); + project.createOrReplaceTempView("project"); + datasource.createOrReplaceTempView("datasource"); + + spark + .sql( + "SELECT id " + + "FROM publication " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + + "UNION ALL " + + "SELECT id " + + "FROM dataset " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + + "UNION ALL " + + "SELECT id " + + "FROM other " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + + "UNION ALL " + + "SELECT id " + + "FROM software " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + + "UNION ALL " + + "SELECT id " + + "FROM organization " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + + "UNION ALL " + + "SELECT id " + + "FROM project " + + "UNION ALL " + + "SELECT id " + + "FROM datasource ") + .createOrReplaceTempView("identifiers"); + + spark + .sql( + "SELECT relation.* " + + "FROM relation " + + "JOIN identifiers i1 " + + "ON source = i1.id " + + "JOIN identifiers i2 " + + "ON target = i2.id " + + "WHERE datainfo.deletedbyinference = false") + .as(Encoders.bean(Relation.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath); + ; + + } +} \ No newline at end of file From 39b1a6edf6fae8c2be7d0f5a604e9f05135fba55 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 15:23:09 +0200 Subject: [PATCH 021/287] added test class for the selection of valid relations and description --- .../SparkSelectValidRelationsJob.java | 8 ++ .../dump/complete/SelectRelationTest.java | 99 +++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java index fdd545ff7..8b477b34d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java @@ -18,6 +18,14 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.oaf.*; +/** + * It selects the valid relations among those present in the graph. One relation is valid if it is not deletedbyinference + * and if both the source and the target node are present in the graph and are not deleted by inference nor invisible. + * To check this I made a view of the ids of all the entities in the graph, and select the relations for which a join exists + * with this view for both the source and the target + */ + + public class SparkSelectValidRelationsJob implements Serializable { private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelationsJob.class); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java new file mode 100644 index 000000000..248bd7256 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java @@ -0,0 +1,99 @@ +package eu.dnetlib.dhp.oa.graph.dump.complete; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.oaf.Relation; +import net.sf.saxon.expr.instruct.ForEach; + +public class SelectRelationTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory + .getLogger(SelectRelationTest.class); + + private static HashMap map = new HashMap<>(); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(SelectRelationTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(SelectRelationTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(SelectRelationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void test1() throws Exception { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/selectrelations") + .getPath(); + + SparkSelectValidRelationsJob.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath + }); + +// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset"); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.oaf.Relation.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.oaf.Relation.class)); + + Assertions.assertTrue(verificationDataset.count() == 7); + + } + +} \ No newline at end of file From 8429aed6c61294f00c3a874f6b9c90c4688189df Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 15:49:38 +0200 Subject: [PATCH 022/287] Added resource for testing selection of valid relations --- .../dhp/oa/graph/dump/selectrelations/dataset | 10 ++++++++++ .../oa/graph/dump/selectrelations/datasource | 7 +++++++ .../graph/dump/selectrelations/organization | 19 +++++++++++++++++++ .../dump/selectrelations/otherresearchproduct | 3 +++ .../dhp/oa/graph/dump/selectrelations/project | 12 ++++++++++++ .../oa/graph/dump/selectrelations/publication | 9 +++++++++ .../oa/graph/dump/selectrelations/relation | 15 +++++++++++++++ .../oa/graph/dump/selectrelations/software | 6 ++++++ 8 files changed, 81 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/dataset create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/datasource create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/organization create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/otherresearchproduct create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/project create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/publication create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/relation create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/software diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/dataset b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/dataset new file mode 100644 index 000000000..2b62b5f0d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/dataset @@ -0,0 +1,10 @@ +{"author":[{"affiliation":[],"fullname":"Kooi, M.","name":"M.","pid":[],"rank":1,"surname":"Kooi"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T12:25:01.958Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"APP"}],"device":null,"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doajarticles::fd4c399077127f0ba09b5205e2b78406","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.17026/dans-zcb-g65a"]}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastmetadataupdate":null,"lastupdatetimestamp":1591283023064,"metadataversionnumber":null,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-05-20T00:36:01Z","harvestDate":"2020-05-25T11:55:15.284Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:117400","metadataNamespace":""}},"originalId":["DansKnawCris::1a960e20087cb46b93588e4e184e8a58"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-zcb-g65a"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-v3-nws7"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"size":null,"source":[],"storagedate":null,"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.891"},"qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"mental disorders"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Sportlaan"}],"version":null} +{"author":[{"affiliation":[],"fullname":"Beckers, I.S.J.","name":"I.S.J.","pid":[],"rank":1,"surname":"Beckers"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T12:30:12.903Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"In april 2016 is een archeologisch vooronderzoek uitgevoerd in een gepland leidingtracé langs de Schaapsdrift in De Bilt (gemeente De Bilt). De aanleiding voor het onderzoek vormt de aanleg van een nieuwe waterleiding en een beluchtingstoren op het pompstation Beerschoten. In het kader van deze ontwikkeling dient een omgevingsvergunning te worden aangevraagd. Het plangebied bestaat uit het deel van het leidingtracé waar op basis van het vigerend bestemmingsplan een dubbelbestemming ‘Waarde Archeologie’ geldt. Op basis van het bureauonderzoek werd in het plangebied een podzolgrond in de top van het dekzand verwacht, waar bovenop mogelijk een circa één-meter dikke laag stuifzand ligt. Het archeologisch niveau bevindt zich hierbij direct onder de bouwvoor c.q. humeuze bovengrond of in de top van een al dan niet begraven podzolbodem. De resten, die hierin te verwachten zijn, dateren naar verwachting in het Laat-Paleolithicum tot en met de Middeleeuwen en betreffen sporen van nederzetting en/of landgebruik uit die periode. De verwachting hierop is middelhoog. Voor wat betreft resten uit de Nieuwe tijd is de verwachting laag. In de Nieuwe tijd is het plangebied waarschijnlijk altijd heidegebied geweest. Ondanks dit verwachtingspatroon zijn in het plangebied wel diepe verstoringen te verwachten als gevolg van de aanleg van het waterpompstation. Op basis van de resultaten van het veldonderzoek blijkt dat het plangebied diep verstoord is geraakt tot tenminste 85 cm –Mv. In de meeste gevallen reikt de verstoring zelfs dieper dan 1,0 m –Mv. Er zijn tevens in het plangebied geen sporen gevonden van stuifzand of resten van oorspronkelijke bodemvorming. Ook archeologische indicatoren ontbreken. De mate van verstoring en het ontbreken van indicatoren leiden ertoe dat de archeologische verwachting voor alle archeologische perioden naar laag is bij te stellen. Er zijn namelijk geen (intacte) resten meer te verwachten."}],"device":null,"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.17026/dans-zqb-kvsa"]}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastmetadataupdate":null,"lastupdatetimestamp":1591282863412,"metadataversionnumber":null,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-05-20T01:02:32Z","harvestDate":"2020-05-25T11:59:33.676Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:128887","metadataNamespace":""}},"originalId":["DansKnawCris::454801f4bde7f3da9bf519c3ced15f64"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-zqb-kvsa"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-n8-lg50"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-zqb-kvsa"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-n8-lg50"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"size":null,"source":[],"storagedate":null,"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"De Bilt, Schaapsdrift (ong.) Gemeente De Bilt"}],"version":null} +{"author":[{"affiliation":[],"fullname":"Gunawan, A.","name":"A.","pid":[],"rank":1,"surname":"Gunawan"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T12:33:00.745Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"N/A"}],"device":null,"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|openaire____::8f991165fae922e29ad55d592f568464","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://www.narcis.nl/publication/RecordID/hdl%3A10411%2F10268"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastmetadataupdate":null,"lastupdatetimestamp":1591282599634,"metadataversionnumber":null,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-03-09T13:04:52Z","harvestDate":"2020-05-25T11:21:31.285Z","identifier":"oai:services.nod.dans.knaw.nl:Products/uvtdataverse2:hdl:10411/10268","metadataNamespace":""}},"originalId":["DansKnawCris::64e5f4fdca8a51b9de769f61b304a12c"],"pid":[],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"size":null,"source":[],"storagedate":null,"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Humanities"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Tilburg University"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Information access for SME's in Indonesia: A study on the business performance of garment manufacturers [Dataset]"}],"version":null} +{"author":[{"affiliation":[],"fullname":"Winslow, Sean M.","name":"Sean M.","pid":[],"rank":1,"surname":"Winslow"},{"affiliation":[],"fullname":"Schneider, Gerlinde","name":"Gerlinde","pid":[],"rank":2,"surname":"Schneider"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T12:37:36.67Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Abstract and poster of paper 0555 presented at the Digital Humanities Conference 2019 (DH2019), Utrecht , the Netherlands 9-12 July, 2019."}],"device":null,"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|openaire____::ec653e804967133b9436fdd30d3ff51d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://www.narcis.nl/publication/RecordID/hdl%3A10411%2FE7PZSQ"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastmetadataupdate":null,"lastupdatetimestamp":1591282617163,"metadataversionnumber":null,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-03-16T13:03:07Z","harvestDate":"2020-05-25T11:21:34.541Z","identifier":"oai:services.nod.dans.knaw.nl:Products/uudataverse:hdl:10411/E7PZSQ","metadataNamespace":""}},"originalId":["DansKnawCris::a3378d960091a2f6c04ccd13410fb9b3"],"pid":[],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"size":null,"source":[],"storagedate":null,"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Arts and Humanities"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Madgwas: a Database of Ethiopian Binding Decoration"}],"version":null} +{"author":[{"affiliation":[],"fullname":"Boer, E. de","name":"E. de","pid":[],"rank":1,"surname":"Boer"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T12:39:48.465Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"BOK"}],"device":null,"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|opendoar____::15231a7ce4ba789d13b722cc5c955834","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.17026/dans-xs6-d4nr"]}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastmetadataupdate":null,"lastupdatetimestamp":1591282882937,"metadataversionnumber":null,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-05-20T02:08:01Z","harvestDate":"2020-05-25T12:07:27.863Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:117484","metadataNamespace":""}},"originalId":["DansKnawCris::c49ecb710a817a28cec40cb0acaca444"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xs6-d4nr"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-3i-17yr"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-xs6-d4nr"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-3i-17yr"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"size":null,"source":[],"storagedate":null,"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Groenstraat 25"}],"version":null} +{"author":[{"affiliation":[],"fullname":"Lil, R. van","name":"R. van","pid":[],"rank":1,"surname":"Lil"},{"affiliation":[],"fullname":"Brenk, S. van den","name":"S. van den","pid":[],"rank":2,"surname":"Brenk"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T12:41:06.532Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"In opdracht van Aquifer Advies B.V. heeft Periplus Archeomare B.V. in samenwerking met Geotron Drilling Company B.V. een archeologisch inventariserend veldonderzoek door middel van boringen uitgevoerd voor het plangebied vaarweg Molenrak in het IJsselmeer. Het onderzoek richt zich op de archeologische niveaus die worden verwacht binnen van het dekzand- en rivierduinenlandschap en het holocene getijdenlandschap. Doel van het booronderzoek was: a) het vaststellen van de aard, morfologie en intactheid van het door jongere sedimenten afgedekte dekzand- en rivierduinenlandschap en het onderzoeken van de mogelijkheden die dit landschap heeft geboden voor bewoning, en b) ter plaatse van de magnetische anomalieën vaststellen van: * de aard van de bemonsterde sedimenten, * het milieu waarin deze sedimenten zijn afgezet, * fenomenen die wijzen op bodemvorming, rijping en bioturbatie, * de aanwezigheid van een cultuurlaag, en * het vaststellen of de aangetroffen afzettingen correleren met het patroon van magnetische anomalieën. Om aan de doelstelling te beantwoorden zijn twintig boringen uitgevoerd, waarvan tien gericht op het dekzand-/rivierduinlandschap en tien gericht op het getijdenlandschap. Op basis van de grootschalige natuurlijke en antropogene verstoringen die in het gebied hebben plaatsgevonden en het versnipperde voorkomen van een intacte dekzandlandschap wordt geadviseerd om het plangebied vrij te geven voor de geplande zandwinning. Tijdens de geplande werkzaamheden kunnen nog resten aan het licht komen die tot heden volledig werden afgedekt in de waterbodem of niet als archeologisch object zijn herkend tijdens het geofysisch onderzoek. De uitvoerder is conform de Erfgoedwet (2016) verplicht om dergelijke vondsten te melden bij de bevoegde overheid. Deze meldingsplicht dient in het bestek of Plan van Aanpak van het werk te worden opgenomen."}],"device":null,"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|dedup_wf_001::1ea4bcb1bae8c6befef1e7f1230f0f10","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.17026/dans-znd-7mjh"]}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastmetadataupdate":null,"lastupdatetimestamp":1591282551996,"metadataversionnumber":null,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-05-20T00:41:20Z","harvestDate":"2020-05-25T11:56:11.592Z","identifier":"oai:services.nod.dans.knaw.nl:Products/dans:oai:easy.dans.knaw.nl:easy-dataset:120749","metadataNamespace":""}},"originalId":["DansKnawCris::d6d0a2f52afdf01dfbc8daccc7237d17"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-znd-7mjh"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-m0-v356"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17026/dans-znd-7mjh"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:ui:13-m0-v356"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"size":null,"source":[],"storagedate":null,"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Vaarweg Molenrak, IJsselmeer. Inventariserend veldonderzoek door middel van Aqyalock boringen"}],"version":null} +{"author":[{"affiliation":[],"fullname":"Huijnen, Pim","name":"Pim","pid":[],"rank":1,"surname":"Huijnen"},{"affiliation":[],"fullname":"Wevers, Melvin","name":"Melvin","pid":[],"rank":2,"surname":"Wevers"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T12:41:13.79Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Abstract and poster of paper 0575 presented at the Digital Humanities Conference 2019 (DH2019), Utrecht , the Netherlands 9-12 July, 2019."}],"device":null,"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|DansKnawCris::da4eb1e96712bb8f2c51e2eb680118ed","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://www.narcis.nl/publication/RecordID/hdl%3A10411%2FPLGBWF"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastmetadataupdate":null,"lastupdatetimestamp":1591282581458,"metadataversionnumber":null,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-03-16T13:03:18Z","harvestDate":"2020-05-25T11:21:36.804Z","identifier":"oai:services.nod.dans.knaw.nl:Products/uudataverse:hdl:10411/PLGBWF","metadataNamespace":""}},"originalId":["DansKnawCris::da4eb1e96712bb8f2c51e2eb680118ed"],"pid":[],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0021","classname":"0021","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"size":null,"source":[],"storagedate":null,"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Arts and Humanities"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Buy Healthy, Tasty, Pure! A Digital Text Analysis of Neoliberal Trends in Dutch Food Culture"}],"version":null} +{"author":[{"affiliation":[],"fullname":"Klackl, Johannes","name":"Johannes","pid":[],"rank":1,"surname":"Klackl"},{"affiliation":[],"fullname":"Jonas, Eva","name":"Eva","pid":[],"rank":2,"surname":"Jonas"},{"affiliation":[],"fullname":"Fritsche, Immo","name":"Immo","pid":[],"rank":3,"surname":"Fritsche"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|rest________::fb1a3d4523c95e63496e3bc7ba36244b","value":"NeuroVault"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:datasource","classname":"Bulktagging for Community - Datasource","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"ni"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-08"},"dateofcollection":"","dateoftransformation":"2020-05-29T13:26:45.945Z","description":[],"device":null,"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|NeuroVault__::0a1bf96b7165e962e90cb14648c9462d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|rest________::fb1a3d4523c95e63496e3bc7ba36244b","value":"NeuroVault"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-08"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|rest________::fb1a3d4523c95e63496e3bc7ba36244b","value":"NeuroVault"},"instancetype":{"classid":"0025","classname":"Image","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/publicdomain/zero/1.0/"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://neurovault.org/api/collections/1455/"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastmetadataupdate":null,"lastupdatetimestamp":1591283207779,"metadataversionnumber":null,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fneurovault.org%2Fapi%2Fcollections%2F","datestamp":"","harvestDate":"2020-05-29T13:25:40.951Z","identifier":"","metadataNamespace":""}},"originalId":["NeuroVault__::0a1bf96b7165e962e90cb14648c9462d"],"pid":[],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"size":null,"source":[],"storagedate":null,"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Brain"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Neural evidence that the behavioral inhibition system is involved in existential threat processing"}],"version":null} +{"author":[{"affiliation":[],"fullname":"Clare Kelly, A. M.","name":"A. M.","pid":[],"rank":1,"surname":"Clare Kelly"},{"affiliation":[],"fullname":"Uddin, Lucina Q.","name":"Lucina Q.","pid":[],"rank":2,"surname":"Uddin"},{"affiliation":[],"fullname":"Biswal, Bharat B.","name":"Bharat B.","pid":[],"rank":3,"surname":"Biswal"},{"affiliation":[],"fullname":"Xavier Castellanos, F.","name":"F.","pid":[],"rank":4,"surname":"Xavier Castellanos"},{"affiliation":[],"fullname":"Milham, Michael P.","name":"Michael P.","pid":[],"rank":5,"surname":"Milham"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|rest________::fb1a3d4523c95e63496e3bc7ba36244b","value":"NeuroVault"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:datasource","classname":"Bulktagging for Community - Datasource","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"ni"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-04"},"dateofcollection":"","dateoftransformation":"2020-05-29T13:26:49.231Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"OpenfMRI ds000102"}],"device":null,"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|NeuroVault__::6c8349cc7260ae62e3b1396831a8398f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|rest________::fb1a3d4523c95e63496e3bc7ba36244b","value":"NeuroVault"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-04"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|rest________::fb1a3d4523c95e63496e3bc7ba36244b","value":"NeuroVault"},"instancetype":{"classid":"0025","classname":"Image","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/publicdomain/zero/1.0/"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://neurovault.org/api/collections/45/"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastmetadataupdate":null,"lastupdatetimestamp":1591283209523,"metadataversionnumber":null,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fneurovault.org%2Fapi%2Fcollections%2F","datestamp":"","harvestDate":"2020-05-29T13:24:17.147Z","identifier":"","metadataNamespace":""}},"originalId":["NeuroVault__::6c8349cc7260ae62e3b1396831a8398f"],"pid":[],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"size":null,"source":[],"storagedate":null,"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Brain"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Competition between functional brain networks mediates behavioral variability"}],"version":null} +{"author":[{"affiliation":[],"fullname":"Shine, James M.","name":"James M.","pid":[],"rank":1,"surname":"Shine"},{"affiliation":[],"fullname":"Matar, Elie","name":"Elie","pid":[],"rank":2,"surname":"Matar"},{"affiliation":[],"fullname":"Ward, Philip B.","name":"Philip B.","pid":[],"rank":3,"surname":"Ward"},{"affiliation":[],"fullname":"Bolitho, Samuel J.","name":"Samuel J.","pid":[],"rank":4,"surname":"Bolitho"},{"affiliation":[],"fullname":"Pearson, Mark","name":"Mark","pid":[],"rank":5,"surname":"Pearson"},{"affiliation":[],"fullname":"Naismith, Sharon L.","name":"Sharon L.","pid":[],"rank":6,"surname":"Naismith"},{"affiliation":[],"fullname":"Lewis, Simon J. G.","name":"Simon J. G.","pid":[],"rank":7,"surname":"Lewis"},{"affiliation":[],"fullname":"Chen, Robert","name":"Robert","pid":[],"rank":8,"surname":"Chen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|rest________::fb1a3d4523c95e63496e3bc7ba36244b","value":"NeuroVault"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null},{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:datasource","classname":"Bulktagging for Community - Datasource","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"ni"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2015-03-05"},"dateofcollection":"","dateoftransformation":"2020-05-29T13:26:53.052Z","description":[],"device":null,"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[],"id":"50|NeuroVault__::e0c641195b27425bb056ac56f8953d24","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|rest________::fb1a3d4523c95e63496e3bc7ba36244b","value":"NeuroVault"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2015-03-05"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|rest________::fb1a3d4523c95e63496e3bc7ba36244b","value":"NeuroVault"},"instancetype":{"classid":"0025","classname":"Image","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/publicdomain/zero/1.0/"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://neurovault.org/api/collections/421/"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastmetadataupdate":null,"lastupdatetimestamp":1591283211447,"metadataversionnumber":null,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fneurovault.org%2Fapi%2Fcollections%2F","datestamp":"","harvestDate":"2020-05-29T13:24:25.02Z","identifier":"","metadataNamespace":""}},"originalId":["NeuroVault__::e0c641195b27425bb056ac56f8953d24"],"pid":[],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"size":null,"source":[],"storagedate":null,"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Brain"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Differential Neural Activation Patterns in Patients with Parkinsons Disease and Freezing of Gait in Response to Concurrent Cognitive and Motor Load"}],"version":null} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/datasource b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/datasource new file mode 100644 index 000000000..95b1b2279 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/datasource @@ -0,0 +1,7 @@ +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2017-11-14","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"extraInfo":[],"id":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1476-0711","name":"Annals of Clinical Microbiology and Antimicrobials"},"lastupdatetimestamp":1616171543519,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj14760711"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1476-0711"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Medicine: Therapeutics. Pharmacology | Medicine: Internal medicine: Infectious and parasitic diseases | Science: Microbiology"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://ann-clinmicrob.biomedcentral.com"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2017-11-14","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"extraInfo":[],"id":"10|doajarticles::5ac587eb28411c351c2e357eb097fd3d","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1476-0711","name":"Annals of Clinical Microbiology and Antimicrobials"},"lastupdatetimestamp":1616171543519,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj14760711"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1476-0711"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Medicine: Therapeutics. Pharmacology | Medicine: Internal medicine: Infectious and parasitic diseases | Science: Microbiology"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://ann-clinmicrob.biomedcentral.com"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2017-11-14","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"extraInfo":[],"id":"10|doajarticles::690b3aaf177a4c70b81bacd8d023cbdc","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1476-0711","name":"Annals of Clinical Microbiology and Antimicrobials"},"lastupdatetimestamp":1616171543519,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj14760711"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1476-0711"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Medicine: Therapeutics. Pharmacology | Medicine: Internal medicine: Infectious and parasitic diseases | Science: Microbiology"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://ann-clinmicrob.biomedcentral.com"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2017-11-14","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"extraInfo":[],"id":"10|doajarticles::2a35952e87adba5e5cb0ccdbcdeb33b7","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1476-0711","name":"Annals of Clinical Microbiology and Antimicrobials"},"lastupdatetimestamp":1616171543519,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj14760711"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1476-0711"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Medicine: Therapeutics. Pharmacology | Medicine: Internal medicine: Infectious and parasitic diseases | Science: Microbiology"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://ann-clinmicrob.biomedcentral.com"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2017-11-14","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"extraInfo":[],"id":"10|doajarticles::877ff1fe2ace265278c02dd271312166","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1476-0711","name":"Annals of Clinical Microbiology and Antimicrobials"},"lastupdatetimestamp":1616171543519,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj14760711"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1476-0711"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Medicine: Therapeutics. Pharmacology | Medicine: Internal medicine: Infectious and parasitic diseases | Science: Microbiology"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://ann-clinmicrob.biomedcentral.com"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2017-11-14","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"extraInfo":[],"id":"10|doajarticles::14adc1a0346e9fa007fa404ce5524e2b","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1476-0711","name":"Annals of Clinical Microbiology and Antimicrobials"},"lastupdatetimestamp":1616171543519,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj14760711"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1476-0711"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Medicine: Therapeutics. Pharmacology | Medicine: Internal medicine: Infectious and parasitic diseases | Science: Microbiology"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://ann-clinmicrob.biomedcentral.com"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2017-11-14","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"extraInfo":[],"id":"10|doajarticles::8340c389da4eb06220bc96e950bdd3ed","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1476-0711","name":"Annals of Clinical Microbiology and Antimicrobials"},"lastupdatetimestamp":1616171543519,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj14760711"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Annals of Clinical Microbiology and Antimicrobials"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1476-0711"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Medicine: Therapeutics. Pharmacology | Medicine: Internal medicine: Infectious and parasitic diseases | Science: Microbiology"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://ann-clinmicrob.biomedcentral.com"}} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/organization b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/organization new file mode 100644 index 000000000..295bca986 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/organization @@ -0,0 +1,19 @@ +{"alternativeNames":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"한국초등도덕교육학회"}],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"KR","classname":"Korea (Republic of)","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::bd5cbea5dc434b8fd811a880cb9d4a05","lastupdatetimestamp":1566902414749,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Korean Elementary Moral Education Society"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Korean Elementary Moral Education Society"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.496778.3"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"http://www.ethics.or.kr/"}} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::9e9e8c76d739212c63eff362e321ba33","value":"NIH - National Institutes of Health"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2016-07-12","dateoftransformation":"2018-09-13","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"id":"20|nih_________::ffcd387c4ca1e9f9b60a398123e45904","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"CROSSPATHS MANAGEMENT SYSTEMS, INC."},"originalId":["nih_________::CROSSPATHS_MANAGEMENT_SYSTEMS__INC."],"pid":[]} +{"alternativeNames":[],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"CA","classname":"Canada","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::b91f67a34df55a0aa1aabdcb3700f413","lastupdatetimestamp":1566902407153,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"AHS - Stollery Children's Hospital"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Stollery Children's Hospital"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.416656.6"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"https://www.stollerykids.com/"}} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::a55eb91348674d853191f4f4fd73d078","value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020"}],"country":{"classid":"CZ","classname":"Czech Republic","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"dateofcollection":"2018-03-12","dateoftransformation":"2020-06-20","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"true"},"extraInfo":[],"id":"20|corda__h2020::56fd6f1eda222f51050b1ad488e1362a","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"INSTITUT MIKROELEKTRONICKYCH APLIKACI S.R.O."},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"IMA"},"originalId":["corda__h2020::999697424"],"pid":[],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"http://www.ima.cz"}} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::a55eb91348674d853191f4f4fd73d078","value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020"}],"country":{"classid":"MK","classname":"Former Yugoslav Republic of Macedonia","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"dateofcollection":"2020-03-31","dateoftransformation":"2020-06-20","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"extraInfo":[],"id":"20|corda__h2020::45032111512cb108a1c3518ec100848c","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"MARSECO DOO TETOVO"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"ORGANIZATION FORSERVICES AND INFORMATION TECHNOLOGIES MARSECO LTD TETOVO"},"originalId":["corda__h2020::901028345"],"pid":[],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"http://www.marseco.mk"}} +{"alternativeNames":[],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"US","classname":"United States","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::c091a5e74444b017cf897f35a56afd7f","lastupdatetimestamp":1566902407729,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"SatCon Technology Corporation (United States)"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"SatCon Technology Corporation (United States)"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.421895.3"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"http://www.satcon.com/en/home"}} +{"alternativeNames":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"SHoF"}],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"SE","classname":"Sweden","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::87698402476531ba39e61f1df38f2a91","lastupdatetimestamp":1566902410217,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Swedish House of Finance"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"SHoF"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.451954.8"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"http://houseoffinance.se/"}} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"country":{"classid":"FI","classname":"Finland","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"dateofcollection":"2018-03-12","dateoftransformation":"2020-06-26","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"true"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"extraInfo":[],"id":"20|corda_______::edaaa4338d23d42b8c5215c78daf86ea","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"KEMIRA OYJ"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"KEMIRA"},"originalId":["corda_______::974730303"],"pid":[],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"http://www.kemira.com"}} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::a55eb91348674d853191f4f4fd73d078","value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020"}],"country":{"classid":"BE","classname":"Belgium","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"dateofcollection":"2018-03-12","dateoftransformation":"2020-06-20","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"extraInfo":[],"id":"20|corda__h2020::d77ade529a736b7f62a0f5d09bc7935c","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"HUAWEI TECHNOLOGIES RESEARCH & DEVELOPMENT BELGIUM"},"originalId":["corda__h2020::955274431"],"pid":[],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"http://www.caliopa.com"}} +{"alternativeNames":[],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"US","classname":"United States","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::fd5cb603199e938ab3aa36dfab18c0fa","lastupdatetimestamp":1566902407655,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Mental Health Association of Southeastern Pennsylvania"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Mental Health Association of Southeastern Pennsylvania"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.421263.0"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""}} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::9e9e8c76d739212c63eff362e321ba33","value":"NIH - National Institutes of Health"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000000000000044"},"dateofcollection":"2016-07-12","dateoftransformation":"2018-09-13","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000000000000044"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000000000000044"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000000000000044"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000000000000044"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000000000000044"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000000000000044"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000000000000044"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000000000000044"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000000000000044"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000000000000044"},"value":"false"},"extraInfo":[],"id":"20|nih_________::d9dbe8cc6dd2cb024635a760622f206e","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000000000000044"},"value":"UNIVERSITY OF SOUTH CAROLINA"},"originalId":["nih_________::UNIVERSITY_OF_SOUTH_CAROLINA"],"pid":[]} +{"alternativeNames":[],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"CZ","classname":"Czech Republic","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.89"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::e0152e043053919e7405426aa02b267d","lastupdatetimestamp":1566902409848,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"ASCR - OI"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"OI"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.447975.c"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"http://www.orient.cas.cz/"}} +{"alternativeNames":[],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"EG","classname":"Egypt","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::967f9e18aa4d31546f54b57af067a2f2","lastupdatetimestamp":1566902409332,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Academy of Arts"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Academy of Arts"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.442752.2"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""}} +{"alternativeNames":[],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"GB","classname":"United Kingdom","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.89"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::0d7fa494d48bc2b6a872e7d6dcd79d54","lastupdatetimestamp":1566902409075,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Cymdeithas Addysg y Gweithwyr"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"WEA"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.439360.b"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"http://www.wea.org.uk/"}} +{"alternativeNames":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"NHC"}],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"US","classname":"United States","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::94948cc036605bf4a00ec77ce5ca92d3","lastupdatetimestamp":1566902413769,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"National Health Council"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"NHC"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.487707.b"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"http://www.nationalhealthcouncil.org/"}} +{"alternativeNames":[],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"ES","classname":"Spain","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.89"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::96570a4fef2d9a453b2b5a8a1241eb2e","lastupdatetimestamp":1566902406171,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"MINECO - CDTI"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"CDTI"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.410460.7"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"https://www.cdti.es/index.asp?idioma=2"}} +{"alternativeNames":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"帯広厚生病院"}],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"JP","classname":"Japan","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|aka_________::2c3aab6bce7516338b4dbfb4f6f86db7","lastupdatetimestamp":1566902407156,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Obihiro Kosei General Hospital"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Obihiro Kosei General Hospital"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.416691.d"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"http://www.dou-kouseiren.com/byouin/obihiro/"}} +{"alternativeNames":[],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"GB","classname":"United Kingdom","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|aka_________::1e2df822bf0932ad0f77565789f22e17","lastupdatetimestamp":1566902408827,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"MedPharm (United Kingdom)"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"MedPharm (United Kingdom)"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.436062.5"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""}} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"country":{"classid":"DE","classname":"Germany","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"dateofcollection":"2018-03-12","dateoftransformation":"2020-06-26","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"true"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"true"},"extraInfo":[],"id":"20|aka_________::0cd5965141113df5739f1ac7ac7f6d37","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"NMTC (NANO & MICRO TECHNOLOGYCONSULTING) - DR MATTHIAS WERNER"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"NMTC"},"originalId":["corda_______::998806188"],"pid":[],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800000011920928955"},"value":"http://www.nmtc.de"}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/otherresearchproduct b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/otherresearchproduct new file mode 100644 index 000000000..c68834d2a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/otherresearchproduct @@ -0,0 +1,3 @@ +{"author":[{"affiliation":[],"fullname":"(:Unkn) Unknown","name":"","pid":[],"rank":1,"surname":""}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"contactgroup":[],"contactperson":[],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Technische Universität Berlin"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Technische Universität Berlin"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Leibinger, Regine"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Bonauer, Markus"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kneer, Florian"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Strugar, Bogdan"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"In den Workshops zum „digitalen Handwerk“ wurden unter dem Motto „Inspiration statt Imitation“ tektonische Modelle anhand von ausgewählten Vorbildern aus der Natur entwickelt. Ziel des Seminars war das Verständnis komplexer räumlicher Strukturen und die präzise Fertigung entsprechender digitaler und physischer Artefakte mit konstruktiven Methoden des rechnerbasierten Modellbaus."}],"embargoenddate":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-06-18"},"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::23f1352bc917eaee7a6f714a104af457","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"instancetype":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.14279/depositonce-1958"]}],"language":{"classid":"de","classname":"de","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283033042,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-11T01:45:04.147Z","identifier":"10.14279/depositonce-1958","metadataNamespace":""}},"originalId":["datacite____::23f1352bc917eaee7a6f714a104af457"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.14279/depositonce-1958"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Fachgebiet Baukonstruktion und Entwerfen der Techn. Univ. Berlin"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"720 Architektur"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Architektur"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Berlin"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Tierpark Berlin"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Architecture"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Berlin"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Zoo"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Natur und Artefakt"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tierpark Berlin"}],"tool":[]} +{"author":[{"affiliation":[],"fullname":"Pilavaki, Andrea","name":"Andrea","pid":[],"rank":1,"surname":"Pilavaki"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"contactgroup":[],"contactperson":[],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"ni"}],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"National Technological University Of Athens"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"National Technological University Of Athens"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"dateofcollection":"","dateoftransformation":"","description":[],"embargoenddate":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-03-23"},"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::3104dd40f9f245a7ccec3a7eedec0677","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"instancetype":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by-nc-nd/3.0/gr"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.26240/heal.ntua.1869"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282610325,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-11T00:09:57.989Z","identifier":"10.26240/heal.ntua.1869","metadataNamespace":""}},"originalId":["datacite____::3104dd40f9f245a7ccec3a7eedec0677"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.26240/heal.ntua.1869"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"National Technological University of Athens"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Περπάτημα"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Walking"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Περιπλάνηση"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Εγκέφαλος"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Σκέψη"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Στοχασμός"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Χωρική αντίληψη"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Wandering"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Brain"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Thinking"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Contemplation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Spatial perception"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Περπατώντας: χωρική εμπειρία και αντίληψη εν κινήσει"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Walking: spatial experience through the moving body"}],"tool":[]} +{"author":[{"affiliation":[],"fullname":"British Market Research Bureau","name":"","pid":[],"rank":1,"surname":""}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"contactgroup":[],"contactperson":[],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:datasource","classname":"Bulktagging for Community - Datasource","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1982-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"These are a series of surveys containing questions asked of a monthly representative sample of 1000 adults about their financial well-being and expectations by the British Market Research Bureau."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::375aca0856010caaae89abe57f442c31","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1982-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::9ebe127e5f3a0bf401875690f3bb6b81","value":"UK Data Archive"},"instancetype":{"classid":"0020","classname":"Other ORP type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.5255/ukda-sn-1698-1"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283267206,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-11T08:47:43.515Z","identifier":"10.5255/ukda-sn-1698-1","metadataNamespace":""}},"originalId":["datacite____::375aca0856010caaae89abe57f442c31"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5255/ukda-sn-1698-1"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"UK Data Service"},"relevantdate":[],"resourcetype":{"classid":"[data collection]","classname":"[data collection]","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.891"},"qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"health care economics and organizations"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Financial Expectations, 1970-1981"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Financial Expectations, November 1978"}],"tool":[{"value": "fake", "dataInfo": null}]} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/project b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/project new file mode 100644 index 000000000..1aff5b903 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/project @@ -0,0 +1,12 @@ +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Fotoniikka ja modernit kuvantamismenetelmät LT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"135027"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-01-25","dateoftransformation":"2019-04-16","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|opendoar____::041abd8c990fc531ab9bd2674a0e2725","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"190,000 €"},"originalId":["aka_________::135027"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Academy Project Funding TT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"316061"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-01-25","dateoftransformation":"2019-04-16","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"450,000 €"},"originalId":["aka_________::316061"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population."},"totalcost":0.0} +{"acronym":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"MOISE"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ANR-17-CE05-0033"},"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-12-24","dateoftransformation":"2020-01-07","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n anr_________::ANR\n ANR\n French National Research Agency (ANR)\n Agence Nationale de la Recherche\n FR\n "}],"id":"40|dedup_wf_001::04e2c34ef4daa411ff2497afc807b612","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["anr_________::ANR-17-CE05-0033"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS"},"totalcost":0.0} +{"acronym":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"GALAXY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ANR-09-SEGI-0005"},"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-12-24","dateoftransformation":"2020-01-07","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n anr_________::ANR\n ANR\n French National Research Agency (ANR)\n Agence Nationale de la Recherche\n FR\n "}],"id":"40|aka_________::1bc716a1763110da3eb1af867de718a8","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["anr_________::ANR-09-SEGI-0005"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"DEVELOPPEMENT COLLABORATIF DE SYSTEMES COMPLEXES SELON UNE APPROCHE GUIDEE PAR LES MODELES"},"totalcost":0.0} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LE0347462"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2015-08-24","dateoftransformation":"2018-11-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2003-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n \n arc:fundingStream\n \n "}],"id":"40|aka_________::a6c805bcfd383bae043d8df38e79db78","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"keywords":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"biomedical nanostructures,femtosecond laser machining,laser manufacturing,laser micromachining,microphotonics,photonic bandgap structures"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::LE0347462"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2003-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Femtosecond laser micromachining facility"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/LE0347462"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LP140100567"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2015-08-24","dateoftransformation":"2018-11-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2017-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Linkage Projects\n Linkage Projects\n Linkage Projects\n \n arc:fundingStream\n \n "}],"id":"40|dedup_wf_001::02859c30f6c8bfbdd8c427068a6ec684","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"keywords":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"EDUCATIONAL MEASUREMENT; EDUCATIONAL MEASUREMENT; HIGH-STAKES TESTING; HIGH-STAKES TESTING; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::LP140100567"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2014-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Linkage Projects - Grant ID: LP140100567"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/LP140100567"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"DP180101235"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2018-04-02","dateoftransformation":"2019-09-02","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2023-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Discovery Projects\n Discovery Projects\n Discovery Projects\n \n arc:fundingStream\n \n "}],"id":"40|arc_________::b46b9e07d4cea67ccf497520a75ad0c8","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::DP180101235"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2018-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Discovery Projects - Grant ID: DP180101235"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/DP180101235"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LE0989831"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2015-08-24","dateoftransformation":"2018-11-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2009-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n \n arc:fundingStream\n \n "}],"id":"40|arc_________::c5f86314ce288f91a7f31c219b128fab","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"keywords":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"database metadata,digital sound,electroacoustic music,film music,music,music information retrieval"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::LE0989831"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2009-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"The Australian Music Navigator: research infrastructure for discovering, accessing and analysing Australia's musical landscape"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/LE0989831"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"3120023"},"collectedfrom":[{"key":"10|openaire____::d1235f66ad3adbcf6c8faf35fa490885","value":"Comisión Nacional de Investigación Científica y Tecnológica"}],"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"IBACACHE ROJAS, JUANA"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2017-09-11","dateoftransformation":"2018-09-28","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2014-01-28"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"conicytf____::CONICYTCONICYTComisión Nacional de Investigación Científica y TecnológicaCLconicytf____::CONICYT::FONDECYT::POSTDOCTORADOFondecyt stream, POSTDOCTORADOFondecyt stream, POSTDOCTORADOconicyt:fondecytfundingsconicytf____::CONICYT::FONDECYTFONDECYTFondecyt fundingsconicyt:fondecytfundings"}],"id":"40|conicytf____::05539f3427ad605d7c1de0168f3e337f","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["conicytf____::3120023"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2011-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"SYNTHESIS AND STRUCTURE-ACTIVITY RELATIONSHIPS OF HETEROARYLISOQUINOLINE- AND PHENANTHRIDINEQUINONES AS ANTITUMOR AGENTS"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://repositorio.conicyt.cl/handle/10533/183109"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"1040240"},"collectedfrom":[{"key":"10|openaire____::d1235f66ad3adbcf6c8faf35fa490885","value":"Comisión Nacional de Investigación Científica y Tecnológica"}],"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ROMERO FIGUEROA, JULIO"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2017-09-11","dateoftransformation":"2018-09-28","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2007-01-15"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"conicytf____::CONICYTCONICYTComisión Nacional de Investigación Científica y TecnológicaCLconicytf____::CONICYT::FONDECYT::REGULARFondecyt stream, REGULARFondecyt stream, REGULARconicyt:fondecytfundingsconicytf____::CONICYT::FONDECYTFONDECYTFondecyt fundingsconicyt:fondecytfundings"}],"id":"40|conicytf____::96b47b91a6c061e31f626612b1650c03","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["conicytf____::1040240"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2004-01-15"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ESTUDIO TEORICO-EXPERIMENTAL DE LA PERMEACION DE FLUIDOS SUPERCRITICOS Y LA SEPARACION DE MEZCLAS A ALTA PRESION A TRAVES DE MEMBRANAS MICROPOROSAS."},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://repositorio.conicyt.cl/handle/10533/163340"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"1020683"},"collectedfrom":[{"key":"10|openaire____::d1235f66ad3adbcf6c8faf35fa490885","value":"Comisión Nacional de Investigación Científica y Tecnológica"}],"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"MANRIQUEZ CASTRO, VICTOR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2017-09-11","dateoftransformation":"2018-09-28","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2006-01-15"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"conicytf____::CONICYTCONICYTComisión Nacional de Investigación Científica y TecnológicaCLconicytf____::CONICYT::FONDECYT::REGULARFondecyt stream, REGULARFondecyt stream, REGULARconicyt:fondecytfundingsconicytf____::CONICYT::FONDECYTFONDECYTFondecyt fundingsconicyt:fondecytfundings"}],"id":"40|conicytf____::b122147e0a13f34cdb6311a9d714f9a5","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["conicytf____::1020683"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2002-01-15"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"SINTESIS Y CARACTERIZACION DE SALES CUATERNARIAS CON EL ANION CALCOFOSFATO [P2Qy]4- (Q=S,Se;y=6,7) PROPIEDADES FISICAS Y REACCIONES DE INCLUSION."},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://repositorio.conicyt.cl/handle/10533/162452"}} +{"acronym":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ANIM"},"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"FP7-PEOPLE-2013-IIF"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"628405"},"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"contactemail":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"M.D.Davies@bristol.ac.uk"},"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Davies, Maria"},"contactphone":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"+44 117 3317352"},"contracttype":{"classid":"MC","classname":"Support for training and career development of researchers (Marie Curie)","schemeid":"ec:FP7contractTypes","schemename":"ec:FP7contractTypes"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2018-03-12","dateoftransformation":"2020-06-26","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2016-04-30"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ec__________::ECECEuropean CommissionEUec__________::EC::FP7::SP3::PEOPLEMarie-Curie ActionsPEOPLEec:programec__________::EC::FP7::SP3SP3-PeopleSP3ec:specificprogramec__________::EC::FP7SEVENTH FRAMEWORK PROGRAMMEFP7ec:frameworkprogram"}],"id":"40|corda_______::132bac68f17bb81c451d9071be6e4d6d","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"MC-IIF"},"optional2":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"https://ec.europa.eu/research/participants/portal/page/call_FP7#wlp_call_FP7"},"originalId":["corda_______::628405"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2014-05-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Precisely Defined, Surface-Engineered Nanostructures via Crystallization-Driven Self-Assembly of Linear-Dendritic Block Copolymers"},"totalcost":0.0} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/publication b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/publication new file mode 100644 index 000000000..469c78e56 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/publication @@ -0,0 +1,9 @@ +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T16:14:18.452Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Lit.opg., bijl."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|doajarticles::b566fa319c3923454e1e8eb886ab62d2","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2007-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282676557,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:51:24Z","harvestDate":"2020-05-25T11:33:13.427Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550013110","metadataNamespace":""}},"originalId":["DansKnawCris::0224aae28af558f21768dbc6439c7a95"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013110"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"prospectie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Inventariserend veldonderzoek d.m.v. boringen (karterende fase) : Raadhuisstraat te Dirkshorn, gemeente Harenkarspel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Synthegra Archeologie Rapportenreeks P0502381"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:03:57.761Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283087415,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:58:39Z","harvestDate":"2020-05-25T11:34:38.707Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800020324","metadataNamespace":""}},"originalId":["DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800020324"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800020324"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:13:23.976Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|doajarticles::fbf7592ddbf2ad3cc0ed70c0f2e1d67c","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282897527,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:42:33Z","harvestDate":"2020-05-25T11:40:10.845Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550053196","metadataNamespace":""}},"originalId":["DansKnawCris::26780065282e607306372abd0d808245"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550053196"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550053196"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"author":[{"affiliation":[],"fullname":"van Someren, Christian","name":"Christian","pid":[],"rank":1,"surname":"van Someren"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"science-innovation-policy"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-11-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T17:55:32.27Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Over het Energieakkoord. In het energieakkoord voor duurzame groei is afgesproken dat in 2020 14 procent van de opwek hernieuwbaar moet zijn en in 2023 16 procent. De doelstelling is een uitdagende opgave waarbij de eerste vraag is: \"Hoeveel hernieuwbare energie wordt er op dit moment opgewekt in Nederland?\" Deze website geeft antwoord op de vraag voor de actueel opgewekte windenergie, zonne-energie en biogas."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2016-11-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://energieopwek.nl/"]}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282663379,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2020-05-08T17:46:55Z","harvestDate":"2020-05-25T15:30:24.079Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/hbo:oai:hbokennisbank.nl:hanzepure:oai:research.hanze.nl:publications/813a5dfa-4fd0-44c4-8cbf-310324dc724d","metadataNamespace":""}},"originalId":["DansKnawCris::3c81248c335f0aa07e06817ece6fa6af"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:hs:18-813a5dfa-4fd0-44c4-8cbf-310324dc724d"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"urn:nbn:nl:hs:18-813a5dfa-4fd0-44c4-8cbf-310324dc724d"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0038","classname":"0038","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"energieproductie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Management"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Monitoring"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Policy and Law"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Energie interventies en gedrag"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"publieke ondersteuning en communicatie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Professional practice & society"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Energie opwek"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T17:55:40.568Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282670417,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:41:34Z","harvestDate":"2020-05-25T11:40:05.974Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550052278","metadataNamespace":""}},"originalId":["DansKnawCris::3c9f068ddc930360bec6925488a9a97f"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550052278"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550052278"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T18:11:57.737Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met lit. opg"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::4669a378a73661417182c208e6fdab53","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282758835,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:36:00Z","harvestDate":"2020-05-25T11:30:47.199Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800007467","metadataNamespace":""}},"originalId":["DansKnawCris::4669a378a73661417182c208e6fdab53"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800007467"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800007467"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Vlaardingen"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Plangebied Het Hof en Oranjepark : gemeente Vlaardingen : archeologisch vooronderzoek: een inventariserend veldonderzoek (verkennende fase)"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"RAAP-rapport, ISSN 0925-6229 2089"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T18:18:18.049Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Met literatuuropgave"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283010899,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:48:12Z","harvestDate":"2020-05-25T11:32:46.363Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce-kb:document:800014509","metadataNamespace":""}},"originalId":["DansKnawCris::4a9152e80f860eab99072e921d74a0ff"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800014509"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceFullCatalogue&search=priref=800014509"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Bladel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proefsleuvenonderzoek aan de Bredasebaan 8 te Bladel"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Archeodienst rapport, ISSN 1877-2900 565"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-01-01"},"dateofcollection":"","dateoftransformation":"2020-05-25T18:31:28.838Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Lit.opg."}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::52c4541c9bffde34daa945ece8dcf635","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"nl","classname":"nl","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283000091,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T07:52:57Z","harvestDate":"2020-05-25T11:33:31.012Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550013915","metadataNamespace":""}},"originalId":["DansKnawCris::52c4541c9bffde34daa945ece8dcf635"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013915"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550013915"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"prospectie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Honselersdijk tracé persleiding (gemeente Westland) : een bureauonderzoek en inventariserend veldonderzoek in de vorm van een verkennend en karterend booronderzoek"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"ADC-rapport 995"}],"journal":null} +{"author":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":null,"dateofcollection":"","dateoftransformation":"2020-05-25T18:32:32.283Z","description":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|DansKnawCris::53842d77ea8c021a3ad5b401a8c7458b","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"dateofacceptance":null,"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::c6df70599aa984f16ee52b4b86d2e89f","value":"DANS (Data Archiving and Networked Services)"},"instancetype":{"classid":"0017","classname":"Report","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":null}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282538754,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http%3A%2F%2Fservices.nod.dans.knaw.nl%2Foa-cerif","datestamp":"2019-12-01T08:28:51Z","harvestDate":"2020-05-25T11:38:50.591Z","identifier":"oai:services.nod.dans.knaw.nl:Publications/rce:document:550039885","metadataNamespace":""}},"originalId":["DansKnawCris::53842d77ea8c021a3ad5b401a8c7458b"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550039885"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"urn","classname":"urn","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"http://cultureelerfgoed.adlibsoft.com/dispatcher.aspx?action=search&database=ChoiceRapporten&search=priref=550039885"}],"publisher":null,"relevantdate":[],"resourcetype":{"classid":"0017","classname":"0017","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"archeologie"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Archaeology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"None"}],"journal":null} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/relation b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/relation new file mode 100644 index 000000000..3e8efe6f5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/relation @@ -0,0 +1,15 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::5ac587eb28411c351c2e357eb097fd3d","subRelType":"provision","target":"20|grid________::b91f67a34df55a0aa1aabdcb3700f413"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::690b3aaf177a4c70b81bacd8d023cbdc","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|doajarticles::b566fa319c3923454e1e8eb886ab62d2","subRelType":"provision","target":"20|dedup_wf_001::4e6c928fef9851b37ec73f4f6daca35b"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","subRelType":"provision","target":"20|grid________::b91f67a34df55a0aa1aabdcb3700f413"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|doajarticles::fbf7592ddbf2ad3cc0ed70c0f2e1d67c","subRelType":"provision","target":"20|dedup_wf_001::1b965e2c0c53e5526d269d63bcfa0ae6"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|doajarticles::fd4c399077127f0ba09b5205e2b78406","subRelType":"provision","target":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|infrastruct_::f66f1bd369679b5b077dcdf006089556","value":"OpenAIRE"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1594398578323,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|openaire____::8f991165fae922e29ad55d592f568464","subRelType":"provision","target":"50|openaire____::ec653e804967133b9436fdd30d3ff51d"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|opendoar____::15231a7ce4ba789d13b722cc5c955834","subRelType":"provision","target":"50|dedup_wf_001::1ea4bcb1bae8c6befef1e7f1230f0f10"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|opendoar____::16d11e9595188dbad0418a85f0351aba","subRelType":"provision","target":"40|opendoar____::041abd8c990fc531ab9bd2674a0e2725"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|opendoar____::46d3f6029f6170ebccb28945964d09bf","subRelType":"provision","target":"40|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|opendoar____::7501e5d4da87ac39d782741cd794002d","subRelType":"provision","target":"40|dedup_wf_001::04e2c34ef4daa411ff2497afc807b612"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::0cd5965141113df5739f1ac7ac7f6d37","subRelType":"participation","target":"40|aka_________::1bc716a1763110da3eb1af867de718a8"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::1e2df822bf0932ad0f77565789f22e17","subRelType":"participation","target":"40|aka_________::a6c805bcfd383bae043d8df38e79db78"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8966"},"lastupdatetimestamp":1595258695262,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|aka_________::2c3aab6bce7516338b4dbfb4f6f86db7","subRelType":"affiliation","target":"40|dedup_wf_001::02859c30f6c8bfbdd8c427068a6ec684"} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/software b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/software new file mode 100644 index 000000000..bbaa85142 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/software @@ -0,0 +1,6 @@ +{"author":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"codeRepositoryUrl":null,"collectedfrom":[{"dataInfo":null,"key":"10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5","value":"bio.tools"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"dateofcollection":"","dateoftransformation":"2020-05-27T11:24:44.113Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Chilibot searches the PubMed literature database based on specific relationships between proteins, genes, or keywords. The results are returned as a graph."}],"documentationUrl":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.chilibot.net"}],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|opendoar____::16d11e9595188dbad0418a85f0351aba","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5","value":"bio.tools"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-05-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5","value":"bio.tools"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://bio.tools/chilibot"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282924188,"license":[],"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbio.tools%2Fapi%2Ftool","datestamp":"","harvestDate":"2020-05-27T11:17:48.494Z","identifier":"","metadataNamespace":""}},"originalId":["__bioTools__::c17ebaca97f287b181090c9b4cba766e"],"pid":[],"programmingLanguage":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:programming_languages","schemename":"dnet:programming_languages"},"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"bio.tools"},"relevantdate":[],"resourcetype":{"classid":"Web application","classname":"Web application","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Natural language processing"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Molecular interactions, pathways and networks"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Proteins"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Literature and language"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8739"},"qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"human activities"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Chilibot"}]} +{"author":[{"affiliation":[],"fullname":"Agustoni, Marco","name":"Marco","pid":[],"rank":1,"surname":"Agustoni"},{"affiliation":[],"fullname":"Marti, Lukas","name":"Lukas","pid":[],"rank":2,"surname":"Marti"},{"affiliation":[],"fullname":"ATLAS, Collaboration","name":"Collaboration","pid":[],"rank":3,"surname":"Atlas"},{"affiliation":[],"fullname":"Schneider, Basil","name":"Basil","pid":[],"rank":4,"surname":"Schneider"},{"affiliation":[],"fullname":"Gallo, Valentina","name":"Valentina","pid":[],"rank":5,"surname":"Gallo"},{"affiliation":[],"fullname":"Ereditato, Antonio","name":"Antonio","pid":[],"rank":6,"surname":"Ereditato"},{"affiliation":[],"fullname":"Sciacca, Gianfranco","name":"Gianfranco","pid":[],"rank":7,"surname":"Sciacca"},{"affiliation":[],"fullname":"Haug, Sigve","name":"Sigve","pid":[],"rank":8,"surname":"Haug"},{"affiliation":[],"fullname":"Kabana, Sonja","name":"Sonja","pid":[],"rank":9,"surname":"Kabana"},{"affiliation":[],"fullname":"Kruker, Tobias","name":"Tobias","pid":[],"rank":10,"surname":"Kruker"},{"affiliation":[],"fullname":"Ancu, Lucian","name":"Lucian","pid":[],"rank":11,"surname":"Ancu"},{"affiliation":[],"fullname":"Battaglia, Andreas","name":"Andreas","pid":[],"rank":12,"surname":"Battaglia"},{"affiliation":[],"fullname":"Beck, Hans Peter","name":"Hans Peter","pid":[],"rank":13,"surname":"Beck"},{"affiliation":[],"fullname":"Pretzl, Klaus-Peter","name":"Klaus-Peter","pid":[],"rank":14,"surname":"Pretzl"},{"affiliation":[],"fullname":"Borer, Claudia","name":"Claudia","pid":[],"rank":15,"surname":"Borer"},{"affiliation":[],"fullname":"Weber, Michael","name":"Michael","pid":[],"rank":16,"surname":"Weber"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"codeRepositoryUrl":null,"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"iis::document_research_initiative","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"}],"id":"egi::virtual::1"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"","dateoftransformation":"","description":[],"documentationUrl":[],"embargoenddate":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|opendoar____::7501e5d4da87ac39d782741cd794002d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.7892/boris.58468"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283098072,"license":[],"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-10T21:56:26.167Z","identifier":"10.7892/boris.58468","metadataNamespace":""}},"originalId":["datacite____::04dd1f84f3a429ecf1e838afcd94cb3f"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.7892/boris.58468"}],"programmingLanguage":{"classid":"application/pdf","classname":"application/pdf","schemeid":"dnet:programming_languages","schemename":"dnet:programming_languages"},"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"EDP Sciences"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"530 Physics"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Search for pair-produced massive coloured scalars in four-jet final states with the ATLAS detector in proton-proton collisions at sqrts=7 TeV"}]} +{"author":[{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"[u'McGill University']"}],"fullname":"Regev, Mor","name":"Mor","pid":[],"rank":1,"surname":"Regev"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"[u'Holon Institute of Technology']"}],"fullname":"Simony, Erez","name":"Erez","pid":[],"rank":2,"surname":"Simony"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"[u'Princeton University']"}],"fullname":"Lee, Katherine","name":"Katherine","pid":[],"rank":3,"surname":"Lee"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"[u'University of Minnesota']"}],"fullname":"Tan, Kean Ming","name":"Kean Ming","pid":[],"rank":4,"surname":"Tan"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"[u'Johns Hopkins University']"}],"fullname":"Chen, Janice","name":"Janice","pid":[],"rank":5,"surname":"Chen"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"[u'Princeton University']"}],"fullname":"Hasson, Uri","name":"Uri","pid":[],"rank":6,"surname":"Hasson"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"codeRepositoryUrl":null,"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"ni"}],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Regev, Mor"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This capsule demonstrates the inter-subject functional correlation (ISFC) analysis described in \"Propagation of information along the cortical hierarchy as a function of attention while reading and listening to stories \" by Regev, Simony, Lee, Tan, Chen and Hasson."}],"documentationUrl":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::6b1e3a2fa60ed8c27317a66d6357f795","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::c52707e6a6b63c5aeac022e62cc8cee7","value":"Code Ocean"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://opensource.org/licenses/MIT"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.24433/co.12957bc5-fa2b-488f-ae72-52e3fe362b5c","https://codeocean.com/2018/10/30/intersubject-functional-correlation-lpar-isfc-rpar-as-a-function-of-attention"]}],"language":{"classid":"en-us","classname":"en-us","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282729250,"license":[],"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-11T05:02:43.976Z","identifier":"10.24433/co.12957bc5-fa2b-488f-ae72-52e3fe362b5c","metadataNamespace":""}},"originalId":["datacite____::6b1e3a2fa60ed8c27317a66d6357f795"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.24433/co.12957bc5-fa2b-488f-ae72-52e3fe362b5c"}],"programmingLanguage":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:programming_languages","schemename":"dnet:programming_languages"},"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Code Ocean"},"relevantdate":[],"resourcetype":{"classid":"Capsule","classname":"Capsule","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Capsule"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Biology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"fmri"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"neuroscience"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"language"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"attention"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Intersubject functional correlation (ISFC) as a function of attention"}]} +{"author":[{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"University of Washington"}],"fullname":"Winebrenner, Dale","name":"Dale","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"ORCID","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0003-2341-1083"}],"rank":1,"surname":"Winebrenner"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"NASA Goddard Spaceflight Center"}],"fullname":"MacGregor, Joseph","name":"Joseph","pid":[],"rank":2,"surname":"Macgregor"},{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"University of Washington"}],"fullname":"Kintner, Paul","name":"Paul","pid":[],"rank":3,"surname":"Kintner"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"codeRepositoryUrl":null,"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"dataInfo":null,"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"science-innovation-policy"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8"},"dateofacceptance":{"dataInfo":null,"value":"2018-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Collection of Matlab scripts and data files to implement and exercise data analysis and modeling in support of the paper "New Estimates of Ice and Oxygen Fluxes Across the Entire Lid of Lake Vostok from Observations of Englacial Radiowave Attenuation", in review at the Journal of Geophysical Research - Earth Surface, October 2018. 

"}],"documentationUrl":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::0347b1cd516fc59e41ba92e0d74e4e9f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://zenodo.org/record/1467433","http://dx.doi.org/10.5281/zenodo.1467432"]},{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://figshare.com/articles/Lake_Vostok_radar_attenuation_and_basal_accretion_data_models_and_computations/7246043","http://dx.doi.org/10.5281/zenodo.1467433"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://zenodo.org/record/1467433","http://dx.doi.org/10.5281/zenodo.1467433"]},{"accessright":{"classid":"OPEN SOURCE","classname":"Open Source","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by/4.0/legalcode"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.5281/zenodo.1467433"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.5281/zenodo.1467433"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591291157172,"license":[],"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-10T17:17:45.749Z","identifier":"10.5281/zenodo.1467432","metadataNamespace":""}},"originalId":["datacite____::6ad543d8217b00d3c6e96f0b0a310d9f","r37980778c78::1a67aade293d5f0c985073a3470fdd9c","datacite____::51f29f85c008323ab696ef1c51ab242c","r37b0ad08687::213cee42e7f14a78806470a35a09f87a","od______2659::213cee42e7f14a78806470a35a09f87a"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1467432"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1467433"}],"programmingLanguage":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:programming_languages","schemename":"dnet:programming_languages"},"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Zenodo"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Lake Vostok, radioglaciology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Cancer"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Inorganic Chemistry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Science Policy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"59999 Environmental Sciences not elsewhere classified"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"69999 Biological Sciences not elsewhere classified"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"80699 Information Systems not elsewhere classified"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Lake Vostok Radar Attenuation And Basal Accretion Data, Models And Computations"}]} +{"author":[{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Laboratory of Psychology and NeuroCognition – CNRS/UGA, Grenoble, France"}],"fullname":"Laboissière, Rafael","name":"Rafael","pid":[],"rank":1,"surname":"Laboissière"}],"bestaccessright":{"classid":"OPEN SOURCE","classname":"Open Source","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"codeRepositoryUrl":null,"collectedfrom":[{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},{"dataInfo":null,"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"science-innovation-policy"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8"},"dateofacceptance":{"dataInfo":null,"value":"2017-01-01"},"dateofcollection":"","dateoftransformation":"2019-09-17T18:42:48.815Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

This release contains the raw data, as well as the pre-processing and statistical analysis scripts (in R) for the experiments of perception of object stability under vection (illusion of self movement).

"}],"documentationUrl":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::1432beb6171baa5da8a85a7f99545d69","instance":[{"accessright":{"classid":"OPEN SOURCE","classname":"Open Source","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.opensource.org/licenses/GPL-3.0"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.5281/zenodo.801400"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.opensource.org/licenses/GPL-3.0"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.5281/zenodo.801400","https://zenodo.org/record/801400"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.5281/zenodo.801400"]},{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.5281/zenodo.801400","https://figshare.com/articles/rlaboiss_vextab-data_Raw_data_and_statistical_analysis_code_for_the_vextab_study/11470608"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.opensource.org/licenses/GPL-3.0"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.5281/zenodo.801399","https://zenodo.org/record/801400"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591291157172,"license":[],"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https://zenodo.org/oai2d","datestamp":"2017-11-02T13:39:51Z","harvestDate":"2019-01-08T12:47:18.347Z","identifier":"oai:zenodo.org:801400","metadataNamespace":""}},"originalId":["r37b0ad08687::25acd09277dae71c80810366a599421c","datacite____::b353952b541b80dfed37f4e48b86e4c2","od______2659::25acd09277dae71c80810366a599421c","r37980778c78::a02038e6624d01df9cf22cb709d7fe92","datacite____::c2c7b341f563b61294ec4c7396a3f984"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.801400"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.801399"}],"programmingLanguage":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:programming_languages","schemename":"dnet:programming_languages"},"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Zenodo"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"perceived object stability, vection, gravity perception, illusory body tilt, frames of reference"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Sociology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Immunology"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Inorganic Chemistry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Science Policy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"69999 Biological Sciences not elsewhere classified"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"rlaboiss/vextab-data: Raw data and statistical analysis code for the vextab study"}]} +{"author":[{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"George Washington University"}],"fullname":"Gaberial Campese","name":"","pid":[],"rank":1,"surname":""}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"codeRepositoryUrl":null,"collectedfrom":[{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},{"dataInfo":null,"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":null}],"id":"science-innovation-policy"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"decisiontree-dedup-test","inferred":true,"invisible":false,"provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8"},"dateofacceptance":{"dataInfo":null,"value":"2019-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

A visualization project that researches trends in military, healthcare, and education expenditures by government. This project uses the Google Charts API.

"}],"documentationUrl":[],"embargoenddate":null,"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|dedup_wf_001::1c8bd19e633976e314b88ce5c3f92d69","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by/4.0/legalcode"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://zenodo.org/record/3490231","http://dx.doi.org/10.5281/zenodo.3490231"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.5281/zenodo.3490231"]},{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://figshare.com/articles/Military_Healthcare_and_Education_Visualization_Project/11451567","http://dx.doi.org/10.5281/zenodo.3490231"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by/4.0/legalcode"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.5281/zenodo.3490167","https://zenodo.org/record/3490167"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by/4.0/legalcode"},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["https://zenodo.org/record/3490231","http://dx.doi.org/10.5281/zenodo.3490166"]},{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.5281/zenodo.3490167","https://figshare.com/articles/Military_Healthcare_and_Education_Visualization_Project/11536080"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"distributionlocation":"","hostedby":{"dataInfo":null,"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"url":["http://dx.doi.org/10.5281/zenodo.3490167"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591291157172,"license":[],"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-11T01:01:41.92Z","identifier":"10.5281/zenodo.3490231","metadataNamespace":""}},"originalId":["datacite____::fc293ea8b1058dc6adaa87ad19a01123","od______2659::ea8c041f18c6201e4f7cd2764b408d20","r37980778c78::a1412e0bb17acd68cd6604c22386a988","datacite____::d8408df60f8d9b703e500ca210de53cb","datacite____::42607e67f94a1d82aebb02eeb7a69d7d","r37980778c78::2dd6d1ca777a3ca6e5f253362dcf1968","od______2659::c27f64126331e6a4a0a82a17aef4b48e"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.3490231"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.3490167"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.3490166"}],"programmingLanguage":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:programming_languages","schemename":"dnet:programming_languages"},"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Zenodo"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Medicine"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Neuroscience"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Cancer"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Science Policy"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"111714 Mental Health"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"19999 Mathematical Sciences not elsewhere classified"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"80699 Information Systems not elsewhere classified"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"trend"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"healthcare"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Education Visualization Project"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Google Charts API"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"education expenditures"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Military"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"visualization project"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_classes","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.7542"},"qualifier":{"classid":"acm","classname":"acm","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"ComputingMilieux_LEGALASPECTSOFCOMPUTING"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Military, Healthcare, and Education Visualization Project"}]} \ No newline at end of file From e9a17ec8997099eaf391f30f00bc0fe4c8c95e61 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 15:53:35 +0200 Subject: [PATCH 023/287] added check to verify not to add void APC --- .../eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 1a4e7c42a..47baee30d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -426,10 +426,15 @@ public class ResultMapper implements Serializable { Optional> oPca = Optional.ofNullable(i.getProcessingchargeamount()); Optional> oPcc = Optional.ofNullable(i.getProcessingchargecurrency()); if (oPca.isPresent() && oPcc.isPresent()) { - APC apc = new APC(); - apc.setCurrency(oPcc.get().getValue()); - apc.setAmount(oPca.get().getValue()); - instance.setArticleprocessingcharge(apc); + Field pca = oPca.get(); + Field pcc = oPcc.get(); + if(!pca.getValue().trim().equals("") && !pcc.getValue().trim().equals("")){ + APC apc = new APC(); + apc.setCurrency(oPcc.get().getValue()); + apc.setAmount(oPca.get().getValue()); + instance.setArticleprocessingcharge(apc); + } + } } From f13e11e3f761c2835695ec21e698d99c454249d5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 13 Jul 2021 16:04:02 +0200 Subject: [PATCH 024/287] [aggregation] datacite wf: defined parameter declaring the path used to store the OAF objects produced by the transformation phase --- .../dhp/actionmanager/datacite/oozie_app/workflow.xml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml index 3e0c2bba6..69fc95957 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml @@ -4,6 +4,10 @@ mainPath the working path of Datacite stores + + oafTargetPath + the target path where the OAF records are stored + isLookupUrl The IS lookUp service endopoint @@ -56,7 +60,7 @@ --masteryarn-cluster --blocksize${blocksize} - +
@@ -78,7 +82,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${mainPath}/datacite_dump - --targetPath${mainPath}/datacite_oaf + --targetPath${oafTargetPath} --isLookupUrl${isLookupUrl} --exportLinks${exportLinks} --masteryarn-cluster From 5295d106914ca9bbe4e0b34e9689f5dcd45b0159 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 16:11:46 +0200 Subject: [PATCH 025/287] added check not to dump deletedByInference entities --- .../dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java | 3 +++ .../oa/graph/dump/complete/SparkSelectValidRelationsJob.java | 4 +++- .../dump/complete/DumpOrganizationProjectDatasourceTest.java | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java index 36ced3a4a..530f7a003 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java @@ -453,6 +453,7 @@ public class DumpGraphEntities implements Serializable { .map( (MapFunction) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o), Encoders.bean(Organization.class)) + .filter(Objects::nonNull) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") @@ -460,6 +461,8 @@ public class DumpGraphEntities implements Serializable { } private static Organization mapOrganization(eu.dnetlib.dhp.schema.oaf.Organization org) { + if(org.getDataInfo().getDeletedbyinference()) + return null; Organization organization = new Organization(); Optional diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java index 8b477b34d..77402ea1b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java @@ -110,9 +110,11 @@ public class SparkSelectValidRelationsJob implements Serializable { "UNION ALL " + "SELECT id " + "FROM project " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + "UNION ALL " + "SELECT id " + - "FROM datasource ") + "FROM datasource " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " ) .createOrReplaceTempView("identifiers"); spark diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java index 69100a114..89ecdfb2b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java @@ -88,7 +88,7 @@ public class DumpOrganizationProjectDatasourceTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Organization.class)); - Assertions.assertEquals(34, verificationDataset.count()); + Assertions.assertEquals(15, verificationDataset.count()); verificationDataset .foreach( From 72397ea1ba4c03f306f49d5e2fe522f0503deb05 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 16:18:35 +0200 Subject: [PATCH 026/287] Added fix for community of arbitrary name length --- .../main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java index d20a3036e..f5b8f1c76 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java @@ -89,7 +89,7 @@ public class DumpProducts implements Serializable { return c.getId(); } if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) { - return c.getId().substring(0, 3); + return c.getId().substring(0, c.getId().indexOf("::")); } return null; }).filter(Objects::nonNull).collect(Collectors.toList()); From 8f322a73cba044338329eecfc00cfa38bf5eaeca Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 16:22:58 +0200 Subject: [PATCH 027/287] change because of the renaming of originalId in acronym --- .../java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java index 31d105b66..a6757d18d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java @@ -35,7 +35,7 @@ public class Process implements Serializable { ri.setType(Constants.RESEARCH_INFRASTRUCTURE); } ri.setId(Utils.getContextId(ci.getId())); - ri.setOriginalId(ci.getId()); + ri.setAcronym(ci.getId()); ri.setDescription(ci.getDescription()); ri.setName(ci.getName()); From fa720c1da44c78ea08784a32c44b71a3899f349f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 13 Jul 2021 16:59:30 +0200 Subject: [PATCH 028/287] [doiboost] added workflow for the ActionSet update dedicated to production --- .../process/oozie_app/config-default.xml | 42 +++ .../doiboost/process/oozie_app/workflow.xml | 259 ++++++++++++++++++ 2 files changed, 301 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/config-default.xml new file mode 100644 index 000000000..508202e30 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/config-default.xml @@ -0,0 +1,42 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml new file mode 100644 index 000000000..cf1cd97f7 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -0,0 +1,259 @@ + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorIntersectionMemory + memory for individual executor + + + + sparkExecutorCores + number of cores used by single executor + + + + workingPath + the working Path + + + + hostedByMapPath + the hostedByMap Path + + + outputPath + the Path of the sequence file action set + + + + + + inputPathCrossref + the Crossref input path + + + + + inputPathMAG + the MAG working path + + + + + + inputPathUnpayWall + the UnpayWall working path + + + + + workingPathOrcid + the ORCID working path + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + ${wf:conf('resumeFrom') eq 'PreprocessMag'} + ${wf:conf('resumeFrom') eq 'PreprocessUW'} + ${wf:conf('resumeFrom') eq 'ProcessORCID'} + ${wf:conf('resumeFrom') eq 'CreateDOIBoost'} + ${wf:conf('resumeFrom') eq 'GenerateActionSet'} + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn-cluster + cluster + ConvertCrossrefToOAF + eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${inputPathCrossref}/crossref_ds + --targetPath${workingPath} + --masteryarn-cluster + + + + + + + + yarn-cluster + cluster + Convert Mag to OAF Dataset + eu.dnetlib.doiboost.mag.SparkProcessMAG + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorIntersectionMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${inputPathMAG}/dataset + --workingPath${inputPathMAG}/process_p + --targetPath${workingPath} + --masteryarn-cluster + + + + + + + + + + yarn-cluster + cluster + Convert UnpayWall to Dataset + eu.dnetlib.doiboost.uw.SparkMapUnpayWallToOAF + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${inputPathUnpayWall}/uw_extracted + --targetPath${workingPath}/uwPublication + --masteryarn-cluster + + + + + + + + + yarn-cluster + cluster + Convert ORCID to Dataset + eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --workingPath${workingPathOrcid} + --targetPath${workingPath}/orcidPublication + --masteryarn-cluster + + + + + + + + + yarn-cluster + cluster + Create DOIBoost Infospace + eu.dnetlib.doiboost.SparkGenerateDoiBoost + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorIntersectionMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --hostedByMapPath${hostedByMapPath} + --affiliationPath${inputPathMAG}/dataset/Affiliations + --paperAffiliationPath${inputPathMAG}/dataset/PaperAuthorAffiliations + --workingPath${workingPath} + --masteryarn-cluster + + + +
+ + + + + yarn-cluster + cluster + Generate DOIBoost ActionSet + eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --dbPublicationPath${workingPath}/doiBoostPublicationFiltered + --dbDatasetPath${workingPath}/crossrefDataset + --crossRefRelation${workingPath}/crossrefRelation + --dbaffiliationRelationPath${workingPath}/doiBoostPublicationAffiliation + --dbOrganizationPath${workingPath}/doiBoostOrganization + --targetPath${workingPath}/actionDataSet + --sFilePath${outputPath} + --masteryarn-cluster + + + + + + +
\ No newline at end of file From 084b4ef999eccd94a2c16eaffb372c2cf87c6950 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 17:07:46 +0200 Subject: [PATCH 029/287] added the creation of the openaireId from funder and grant number if the element is not present in the context profile --- .../dump/complete/QueryInformationSystem.java | 68 +++++++++++++++++-- 1 file changed, 63 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java index c33a693a5..7a69c1a0d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java @@ -14,6 +14,7 @@ import org.jetbrains.annotations.NotNull; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import eu.dnetlib.dhp.utils.DHPUtils; public class QueryInformationSystem { @@ -113,14 +114,71 @@ public class QueryInformationSystem { @NotNull private List getCategoryList(Element el, String prefix) { List datasourceList = new ArrayList<>(); - for (Object node : el.selectNodes(".//param")) { - Node n = (Node) node; - if (n.valueOf("./@name").equals("openaireId")) { - datasourceList.add(prefix + "|" + n.getText()); - } + for (Object node : el.selectNodes(".//concept")) { + String oid = getOpenaireId((Node) node, prefix); + if (oid != null) + datasourceList.add(oid); } return datasourceList; } + private String getOpenaireId(Node el, String prefix) { + for (Object node : el.selectNodes(".//param")) { + Node n = (Node) node; + if (n.valueOf("./@name").equals("openaireId")) { + return prefix + "|" + n.getText(); + } + } + + return makeOpenaireId(el, prefix); + + } + + private String makeOpenaireId(Node el, String prefix) { + String funder = null; + String grantId = null; + String funding = null; + for (Object node : el.selectNodes(".//param")) { + Node n = (Node) node; + switch (n.valueOf("./@name")) { + case "funding": + funding = n.getText(); + break; + case "funder": + funder = n.getText(); + break; + case "CD_PROJECT_NUMBER": + grantId = n.getText(); + break; + } + } + String nsp = null; + switch (funder.toLowerCase()) { + case "ec": + if (funding == null) { + return null; + } + if (funding.toLowerCase().startsWith("h2020")) { + nsp = "corda__h2020::"; + } else { + nsp = "corda_______::"; + } + break; + case "tubitak": + nsp = "tubitakf____::"; + break; + case "dfg": + nsp = "dfgf________::"; + break; + default: + nsp = funder.toLowerCase(); + for (int i = funder.length(); i < 12; i++) + nsp += "_"; + nsp += "::"; + } + + return prefix + "|" + nsp + DHPUtils.md5(grantId); + } + } From 59615da65e1a6984e82d23dff5b860800d5939f6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 17:09:15 +0200 Subject: [PATCH 030/287] Add test to verify the creation of relation between context and projects --- .../dump/complete/CreateRelationTest.java | 152 +++++++++++++++++- 1 file changed, 150 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java index b556fa2d6..5f21e0bc5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; + +import eu.dnetlib.dhp.schema.oaf.Project; import java.util.*; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -203,6 +205,7 @@ public class CreateRelationTest { " \n" + " oac_ni\n" + " 2018-03-01T12:00:00\n" + + " \n" + " \n" + " \n" + " re3data_____::5b9bf9171d92df854cf3c520692e9122\n" + @@ -437,8 +440,58 @@ public class CreateRelationTest { " \n" + " oaa_elixir-gr\n" + " 2018-03-01T12:00:00\n" + - " \n" + - " \n" + " \n" + + " \n" + + " BIO-INFORMATICS RESEARCH NETWORK COORDINATING CENTER (BIRN-CC)\n" + + " \n" + + " 1U24RR025736-01\n" + + " NIH\n" + + " \n" + + " \n" + + " COLLABORATIVE RESEARCH: The Cognitive Neuroscience of Category Learning\n" + + " \n" + + " 0223843\n" + + " NSF\n" + + " \n" + + " \n" + + " The Cognitive Atlas: Developing an Interdisciplinary Knowledge Base Through Socia\n" + + " \n" + + " 5R01MH082795-05\n" + + " NIH\n" + + " \n" + + " \n" + + " Fragmented early life environmental and emotional / cognitive vulnerabilities\n" + + " \n" + + " 1P50MH096889-01A1\n" + + " NIH\n" + + " \n" + + " \n" + + " Enhancement of the 1000 Functional Connectome Project\n" + + " \n" + + " 1R03MH096321-01A1\n" + + " TUBITAK\n" + + " \n" + + " \n" + + " CRCNS Data Sharing: An open data repository for cognitive neuroscience: The OpenfMRI Project\n" + + " \n" + + " 1131441\n" + + " NSF\n" + + " \n" + + " \n" + + " Enhancing Human Cortical Plasticity: Visual Psychophysics and fMRI\n" + + " \n" + + " 0121950\n" + + " NSF\n" + + " \n" + + " \n" + + " Transforming statistical methodology for neuroimaging meta-analysis.\n" + + " \n" + + " 100309\n" + + " WT\n" + + " \n" + + " " + + + " \n" + " \n" + " rest________::b8e502674c3c3499d5374e9b2ea6d8d5\n" + @@ -566,4 +619,99 @@ public class CreateRelationTest { tmp.contains("10|doajarticles::2899208a99aa7d142646e0a80bfeef05")); } + + @Test + public void test2() { + List cInfoList = new ArrayList<>(); + final Consumer consumer = ci -> cInfoList.add(ci); + + queryInformationSystem + .getContextRelation(consumer, "projects", ModelSupport.getIdPrefix(Project.class)); + + cInfoList.forEach(c -> System.out.println(new Gson().toJson(c))); + + + List rList = new ArrayList<>(); + + cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add)); + + Assertions.assertEquals(44 , rList.size()); + + Assertions + .assertFalse( + rList + .stream() + .map(r -> r.getSource().getId()) + .collect(Collectors.toSet()) + .contains( + String + .format( + "%s|%s::%s", Constants.CONTEXT_ID, + Constants.CONTEXT_NS_PREFIX, + DHPUtils.md5("dh-ch")))); + + Assertions + .assertEquals( + 2, + rList + .stream() + .filter( + r -> r + .getSource() + .getId() + .equals( + String + .format( + "%s|%s::%s", Constants.CONTEXT_ID, + Constants.CONTEXT_NS_PREFIX, + DHPUtils.md5("clarin")))) + .collect(Collectors.toList()) + .size()); + + Assertions + .assertEquals( + 2, + rList + .stream() + .filter( + r -> r + .getTarget() + .getId() + .equals( + String + .format( + "%s|%s::%s", Constants.CONTEXT_ID, + Constants.CONTEXT_NS_PREFIX, + DHPUtils.md5("clarin")))) + .collect(Collectors.toList()) + .size()); + + Set tmp = rList + .stream() + .filter( + r -> r + .getSource() + .getId() + .equals( + String + .format( + "%s|%s::%s", Constants.CONTEXT_ID, + Constants.CONTEXT_NS_PREFIX, + DHPUtils.md5("clarin")))) + .map(r -> r.getTarget().getId()) + .collect(Collectors.toSet()); + + Assertions + .assertTrue( + tmp.contains("40|corda__h2020::b5a4eb56bf84bef2ebc193306b4d423f") && + tmp.contains("40|corda_______::ef782b2d85676aa3e5a907427feb18c4") ); + + rList.forEach(rel -> { + if (rel.getSource().getId().startsWith("40|")){ + String proj = rel.getSource().getId().substring(3); + Assertions.assertTrue(proj.substring(0, proj.indexOf("::")).length() == 12); + } + }); + + } } From 618d2de2da0b9749b5eca6097faee25706d08956 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 17:10:02 +0200 Subject: [PATCH 031/287] minor changes and refactoring --- .../dhp/oa/graph/dump/ResultMapper.java | 2 +- .../dump/complete/CreateContextRelation.java | 8 +- .../dump/complete/DumpGraphEntities.java | 4 +- .../dhp/oa/graph/dump/complete/Extractor.java | 4 +- .../SparkSelectValidRelationsJob.java | 188 +++++++++--------- .../dhp/oa/graph/dump/DumpJobTest.java | 36 ++-- .../graph/dump/complete/CreateEntityTest.java | 10 +- .../dump/complete/SelectRelationTest.java | 99 ++++----- 8 files changed, 176 insertions(+), 175 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 47baee30d..fe9b4e443 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -428,7 +428,7 @@ public class ResultMapper implements Serializable { if (oPca.isPresent() && oPcc.isPresent()) { Field pca = oPca.get(); Field pcc = oPcc.get(); - if(!pca.getValue().trim().equals("") && !pcc.getValue().trim().equals("")){ + if (!pca.getValue().trim().equals("") && !pcc.getValue().trim().equals("")) { APC apc = new APC(); apc.setCurrency(oPcc.get().getValue()); apc.setAmount(oPca.get().getValue()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index 102406315..59f660525 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -70,10 +70,10 @@ public class CreateContextRelation implements Serializable { cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class)); log.info("Creating relations for projects... "); -// cce -// .execute( -// Process::getRelation, CONTEX_RELATION_PROJECT, -// ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class)); + cce + .execute( + Process::getRelation, CONTEX_RELATION_PROJECT, + ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class)); cce.close(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java index 530f7a003..b843ec365 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java @@ -453,7 +453,7 @@ public class DumpGraphEntities implements Serializable { .map( (MapFunction) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o), Encoders.bean(Organization.class)) - .filter(Objects::nonNull) + .filter(Objects::nonNull) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") @@ -461,7 +461,7 @@ public class DumpGraphEntities implements Serializable { } private static Organization mapOrganization(eu.dnetlib.dhp.schema.oaf.Organization org) { - if(org.getDataInfo().getDeletedbyinference()) + if (org.getDataInfo().getDeletedbyinference()) return null; Organization organization = new Organization(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java index 31886d1b1..f76f92013 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java @@ -129,7 +129,7 @@ public class Extractor implements Serializable { return relationList.iterator(); }, Encoders.bean(Relation.class)) - .write() + .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) .json(outputPath); @@ -147,7 +147,7 @@ public class Extractor implements Serializable { .map( paction -> Provenance .newInstance( - paction.getClassid(), + paction.getClassname(), dinfo.getTrust())) .orElse( Provenance diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java index 77402ea1b..e729e13df 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.oa.graph.dump.complete; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; @@ -25,113 +26,112 @@ import eu.dnetlib.dhp.schema.oaf.*; * with this view for both the source and the target */ - public class SparkSelectValidRelationsJob implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelationsJob.class); + private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelationsJob.class); - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkSelectValidRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json")); + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkSelectValidRelationsJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json")); - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); - SparkConf conf = new SparkConf(); + SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - selectValidRelation(spark, inputPath, outputPath); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + selectValidRelation(spark, inputPath, outputPath); - }); + }); - } + } - private static void selectValidRelation(SparkSession spark, String inputPath, String outputPath) { - Dataset relation = Utils.readPath(spark, inputPath + "/relation", Relation.class); - Dataset publication = Utils.readPath(spark, inputPath + "/publication", Publication.class); - Dataset dataset = Utils - .readPath(spark, inputPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class); - Dataset software = Utils.readPath(spark, inputPath + "/software", Software.class); - Dataset other = Utils - .readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class); - Dataset organization = Utils.readPath(spark, inputPath + "/organization", Organization.class); - Dataset project = Utils.readPath(spark, inputPath + "/project", Project.class); - Dataset datasource = Utils.readPath(spark, inputPath + "/datasource", Datasource.class); + private static void selectValidRelation(SparkSession spark, String inputPath, String outputPath) { + Dataset relation = Utils.readPath(spark, inputPath + "/relation", Relation.class); + Dataset publication = Utils.readPath(spark, inputPath + "/publication", Publication.class); + Dataset dataset = Utils + .readPath(spark, inputPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class); + Dataset software = Utils.readPath(spark, inputPath + "/software", Software.class); + Dataset other = Utils + .readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class); + Dataset organization = Utils.readPath(spark, inputPath + "/organization", Organization.class); + Dataset project = Utils.readPath(spark, inputPath + "/project", Project.class); + Dataset datasource = Utils.readPath(spark, inputPath + "/datasource", Datasource.class); - relation.createOrReplaceTempView("relation"); - publication.createOrReplaceTempView("publication"); - dataset.createOrReplaceTempView("dataset"); - other.createOrReplaceTempView("other"); - software.createOrReplaceTempView("software"); - organization.createOrReplaceTempView("organization"); - project.createOrReplaceTempView("project"); - datasource.createOrReplaceTempView("datasource"); + relation.createOrReplaceTempView("relation"); + publication.createOrReplaceTempView("publication"); + dataset.createOrReplaceTempView("dataset"); + other.createOrReplaceTempView("other"); + software.createOrReplaceTempView("software"); + organization.createOrReplaceTempView("organization"); + project.createOrReplaceTempView("project"); + datasource.createOrReplaceTempView("datasource"); - spark - .sql( - "SELECT id " + - "FROM publication " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + - "UNION ALL " + - "SELECT id " + - "FROM dataset " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + - "UNION ALL " + - "SELECT id " + - "FROM other " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + - "UNION ALL " + - "SELECT id " + - "FROM software " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + - "UNION ALL " + - "SELECT id " + - "FROM organization " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + - "UNION ALL " + - "SELECT id " + - "FROM project " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + - "UNION ALL " + - "SELECT id " + - "FROM datasource " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " ) - .createOrReplaceTempView("identifiers"); + spark + .sql( + "SELECT id " + + "FROM publication " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + + "UNION ALL " + + "SELECT id " + + "FROM dataset " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + + "UNION ALL " + + "SELECT id " + + "FROM other " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + + "UNION ALL " + + "SELECT id " + + "FROM software " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + + "UNION ALL " + + "SELECT id " + + "FROM organization " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + + "UNION ALL " + + "SELECT id " + + "FROM project " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + + "UNION ALL " + + "SELECT id " + + "FROM datasource " + + "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false ") + .createOrReplaceTempView("identifiers"); - spark - .sql( - "SELECT relation.* " + - "FROM relation " + - "JOIN identifiers i1 " + - "ON source = i1.id " + - "JOIN identifiers i2 " + - "ON target = i2.id " + - "WHERE datainfo.deletedbyinference = false") - .as(Encoders.bean(Relation.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath); - ; + spark + .sql( + "SELECT relation.* " + + "FROM relation " + + "JOIN identifiers i1 " + + "ON source = i1.id " + + "JOIN identifiers i2 " + + "ON target = i2.id " + + "WHERE datainfo.deletedbyinference = false") + .as(Encoders.bean(Relation.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath); + ; - } -} \ No newline at end of file + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index f0842c844..6b64a4124 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -412,40 +412,42 @@ public class DumpJobTest { @Test public void testArticlePCA() { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca") + .getPath(); final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); DumpProducts dump = new DumpProducts(); dump - .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); + .run( + // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); + .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); Assertions.assertEquals(23, verificationDataset.count()); - //verificationDataset.show(false); + // verificationDataset.show(false); Assertions.assertEquals(23, verificationDataset.filter("type = 'publication'").count()); verificationDataset.createOrReplaceTempView("check"); - org.apache.spark.sql.Dataset temp = spark.sql("select id " + - "from check " + - "lateral view explode (instance) i as inst " + - "where inst.articleprocessingcharge is not null"); + org.apache.spark.sql.Dataset temp = spark + .sql( + "select id " + + "from check " + + "lateral view explode (instance) i as inst " + + "where inst.articleprocessingcharge is not null"); Assertions.assertTrue(temp.count() == 2); @@ -453,8 +455,6 @@ public class DumpJobTest { Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1); - - // verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset // .filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'") // .count() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index 3ecbd1894..679c09393 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -97,7 +97,7 @@ public class CreateEntityTest { Assertions.assertEquals(12, riList.size()); riList.stream().forEach(c -> { - switch (c.getOriginalId()) { + switch (c.getAcronym()) { case "mes": Assertions .assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_COMMUNITY)); @@ -115,9 +115,9 @@ public class CreateEntityTest { String .format( "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5(c.getOriginalId())))); + DHPUtils.md5(c.getAcronym())))); Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_mes")); - Assertions.assertTrue("mes".equals(c.getOriginalId())); + Assertions.assertTrue("mes".equals(c.getAcronym())); break; case "clarin": Assertions @@ -130,9 +130,9 @@ public class CreateEntityTest { String .format( "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5(c.getOriginalId())))); + DHPUtils.md5(c.getAcronym())))); Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_clarin")); - Assertions.assertTrue("clarin".equals(c.getOriginalId())); + Assertions.assertTrue("clarin".equals(c.getAcronym())); break; } // TODO add check for all the others Entities diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java index 248bd7256..3e4ab93c5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.oa.graph.dump.complete; import java.io.IOException; @@ -28,72 +29,72 @@ import net.sf.saxon.expr.instruct.ForEach; public class SelectRelationTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static SparkSession spark; + private static SparkSession spark; - private static Path workingDir; + private static Path workingDir; - private static final Logger log = LoggerFactory - .getLogger(SelectRelationTest.class); + private static final Logger log = LoggerFactory + .getLogger(SelectRelationTest.class); - private static HashMap map = new HashMap<>(); + private static HashMap map = new HashMap<>(); - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(SelectRelationTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(SelectRelationTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); - SparkConf conf = new SparkConf(); - conf.setAppName(SelectRelationTest.class.getSimpleName()); + SparkConf conf = new SparkConf(); + conf.setAppName(SelectRelationTest.class.getSimpleName()); - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - spark = SparkSession - .builder() - .appName(SelectRelationTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } + spark = SparkSession + .builder() + .appName(SelectRelationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } - @Test - public void test1() throws Exception { + @Test + public void test1() throws Exception { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/selectrelations") - .getPath(); + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/selectrelations") + .getPath(); - SparkSelectValidRelationsJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/relation", - "-sourcePath", sourcePath - }); + SparkSelectValidRelationsJob.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath + }); // dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset"); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.oaf.Relation.class)); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.oaf.Relation.class)); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.oaf.Relation.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.oaf.Relation.class)); - Assertions.assertTrue(verificationDataset.count() == 7); + Assertions.assertTrue(verificationDataset.count() == 7); - } + } -} \ No newline at end of file +} From d418c309f57518a7e1d25fb45c36b145b4dac902 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 17:11:49 +0200 Subject: [PATCH 032/287] removed the part after part-x- in the file name generated by spark. It was too long and created problems while creating the tar entries --- .../main/java/eu/dnetlib/dhp/common/MakeTarArchive.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java index 7dc0e4417..518dfefe0 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java @@ -90,6 +90,13 @@ public class MakeTarArchive implements Serializable { String p_string = p.toString(); if (!p_string.endsWith("_SUCCESS")) { String name = p_string.substring(p_string.lastIndexOf("/") + 1); + if (name.startsWith("part-") & name.length() > 10) { + String tmp = name.substring(0, 10); + if (name.contains(".")) { + tmp += name.substring(name.indexOf(".")); + } + name = tmp; + } TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name); entry.setSize(fileStatus.getLen()); current_size += fileStatus.getLen(); From 734de62474089e7111debe5b39be0b1da05c8a43 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 13 Jul 2021 17:26:04 +0200 Subject: [PATCH 033/287] [doiboost] added workflow for the ActionSet update dedicated to production --- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index cf1cd97f7..f845d97f3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -106,8 +106,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${inputPathCrossref}/crossref_ds - --targetPath${workingPath} + --targetPath${workingPath} --masteryarn-cluster @@ -195,8 +194,7 @@ - + yarn-cluster cluster From 5e38c7f42da63cd2be6078ea1b443eecf5944fb2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 17:32:38 +0200 Subject: [PATCH 034/287] dumping only communities with status all --- .../eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index d118accba..fe4036925 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -20,7 +20,7 @@ public class QueryInformationSystem { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and ($x//context/param[./@name = 'status']/text() = 'manager' or $x//context/param[./@name = 'status']/text() = 'all') " + " and ($x//context/param[./@name = 'status']/text() = 'all') " + " return " + " " + From d70f8c96fdb6bde8b5b2e1e4fb74a86beae80276 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 17:34:53 +0200 Subject: [PATCH 035/287] funding contains and not starts with h2020 --- .../dhp/oa/graph/dump/complete/QueryInformationSystem.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java index 7a69c1a0d..3736b7380 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java @@ -159,7 +159,7 @@ public class QueryInformationSystem { if (funding == null) { return null; } - if (funding.toLowerCase().startsWith("h2020")) { + if (funding.toLowerCase().contains("h2020")) { nsp = "corda__h2020::"; } else { nsp = "corda_______::"; From c028feef4f5664002a8733bfac072a6652889d93 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 18:06:44 +0200 Subject: [PATCH 036/287] workflow for the dump as sub workflows --- .../dump/wf/main/oozie_app/config-default.xml | 30 + .../graph/dump/wf/main/oozie_app/import.txt | 4 + .../graph/dump/wf/main/oozie_app/workflow.xml | 306 ++++++++++ .../oozie_app/config-default.xml | 30 + .../oozie_app/workflow.xml | 347 +++++++++++ .../community/oozie_app/config-default.xml | 30 + .../community/oozie_app/import.txt | 2 + .../community/oozie_app/workflow.xml | 145 +++++ .../complete/oozie_app/config-default.xml | 30 + .../complete/oozie_app/workflow.xml | 537 ++++++++++++++++++ .../funder/oozie_app/config-default.xml | 0 .../subworkflows/funder/oozie_app/import.txt | 2 + .../funder/oozie_app/workflow.xml | 256 +++++++++ 13 files changed, 1719 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/import.txt create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/import.txt create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/import.txt create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/config-default.xml new file mode 100644 index 000000000..d262cb6e0 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/import.txt b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/import.txt new file mode 100644 index 000000000..bf55947d1 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/import.txt @@ -0,0 +1,4 @@ +## This is a classpath-based import file (this header is required) +dump_complete classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app +dump_funder classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app +dump_community classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml new file mode 100644 index 000000000..2a612dec5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml @@ -0,0 +1,306 @@ + + + + + singleDeposition + Indicates if it is a single community deposition + + + communityId + the id of the community to be dumped if a dump for a single community should be done + + + dumpType + the type of the dump one of {complete, community, funder} + + + onlyUpload + true if the dump is already done and should only be upload in zenodo + + + upload + true if the dump should be upload in zenodo + + + sourcePath + the source path + + + isLookUpUrl + the isLookup service endpoint + + + outputPath + the output path + + + resultAggregation + true if all the result type have to be dumped under result. false otherwise + + + accessToken + the access token used for the deposition in Zenodo + + + connectionUrl + the connection url for Zenodo + + + metadata + the metadata associated to the deposition + + + depositionType + the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) + + + conceptRecordId + for new version, the id of the record for the old deposition + + + depositionId + the depositionId of a deposition open that has to be added content + + + organizationCommunityMap + the organization community map + + + + hiveDbName + the target hive database name + + + hiveJdbcUrl + hive server jdbc url + + + hiveMetastoreUris + hive server metastore URIs + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${wf:conf('onlyUpload') eq true} + + + + + + + + + + + + + + + + eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap + --outputPath${workingDir}/communityMap + --nameNode${nameNode} + --isLookUpUrl${isLookUpUrl} + --singleDeposition${singleDeposition} + --communityId${communityId} + + + + + + + + ${wf:conf('dumpType') eq "funder"} + ${wf:conf('dumpType') eq "community"} + + + + + + + + ${wf:appPath()}/dump_complete + + + + + communityMapPath + ${workingDir}/communityMap + + + outputPath + ${workingDir}/tar + + + sourcePath + ${sourcePath} + + + organizationCommunityMap + ${organizationCommunityMap} + + + isLookUpUrl + ${isLookUpUrl} + + + resultAggregation + ${resultAggregation} + + + + + + + + + + + ${wf:appPath()}/dump_community + + + + + sourcePath + ${sourcePath} + + + communityMapPath + ${workingDir}/communityMap + + + outputPath + ${workingDir}/tar + + + + + + + + + + ${wf:appPath()}/dump_funder + + + + + communityMapPath + ${workingDir}/communityMap + + + outputPath + ${workingDir}/tar + + + sourcePath + ${sourcePath} + + + dumpType + ${dumpType} + + + + + + + + + + eu.dnetlib.dhp.oa.graph.dump.MakeTar + --hdfsPath${outputPath} + --nameNode${nameNode} + --sourcePath${workingDir}/tar + + + + + + + + ${wf:conf('upload') eq true} + + + + + + + eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS + --hdfsPath${outputPath} + --nameNode${nameNode} + --accessToken${accessToken} + --connectionUrl${connectionUrl} + --metadata${metadata} + --conceptRecordId${conceptRecordId} + --depositionType${depositionType} + --depositionId${depositionId} + + + + + + + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/config-default.xml new file mode 100644 index 000000000..e5ec3d0ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/workflow.xml new file mode 100644 index 000000000..5f954ff38 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/workflow.xml @@ -0,0 +1,347 @@ + + + + + sourcePath + the source path + + + outputPath + the output path + + + communityMapPath + the path to the community map + + + selectedResults + the path the the possible subset ot results to be dumped + + + hiveDbName + the target hive database name + + + hiveJdbcUrl + hive server jdbc url + + + hiveMetastoreUris + hive server metastore URIs + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + yarn + cluster + Dump table publication for community/funder related products + eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${selectedResults}/publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/dump/publication + --communityMapPath${communityMapPath} + --dumpType${dumpType} + + + + + + + + yarn + cluster + Dump table dataset for community/funder related products + eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${selectedResults}/dataset + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/dump/dataset + --communityMapPath${communityMapPath} + + + + + + + + yarn + cluster + Dump table ORP for community related products + eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${selectedResults}/otherresearchproduct + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/dump/otherresearchproduct + --communityMapPath${communityMapPath} + + + + + + + + yarn + cluster + Dump table software for community related products + eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${selectedResults}/software + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/dump/software + --communityMapPath${communityMapPath} + + + + + + + + + + yarn + cluster + Prepare association result subset of project info + eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath} + --outputPath${workingDir}/preparedInfo + + + + + + + + + + + + + + + yarn + cluster + Extend dumped publications with information about project + eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/dump/publication + --outputPath${outputPath}/ext/publication + --preparedInfoPath${workingDir}/preparedInfo + + + + + + + + yarn + cluster + Extend dumped dataset with information about project + eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/dump/dataset + --outputPath${outputPath}/ext/dataset + --preparedInfoPath${workingDir}/preparedInfo + + + + + + + + yarn + cluster + Extend dumped ORP with information about project + eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/dump/otherresearchproduct + --outputPath${outputPath}/ext/orp + --preparedInfoPath${workingDir}/preparedInfo + + + + + + + + yarn + cluster + Extend dumped software with information about project + eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/dump/software + --outputPath${outputPath}/ext/software + --preparedInfoPath${workingDir}/preparedInfo + + + + + + + + + + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/config-default.xml new file mode 100644 index 000000000..e5ec3d0ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/import.txt b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/import.txt new file mode 100644 index 000000000..15387e988 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/import.txt @@ -0,0 +1,2 @@ +## This is a classpath-based import file (this header is required) +dump_common classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml new file mode 100644 index 000000000..319517026 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml @@ -0,0 +1,145 @@ + + + + + sourcePath + the source path + + + outputPath + the output path + + + hiveDbName + the target hive database name + + + hiveJdbcUrl + hive server jdbc url + + + hiveMetastoreUris + hive server metastore URIs + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${wf:appPath()}/dump_common + + + + + sourcePath + ${sourcePath} + + + selectedResults + ${sourcePath} + + + communityMapPath + ${workingDir}/communityMap + + + outputPath + ${workingDir} + + + + + + + + + + + + yarn + cluster + Split dumped result for community + eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/ext + --outputPath${outputPath} + --communityMapPath${communityMapPath} + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/config-default.xml new file mode 100644 index 000000000..e5ec3d0ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml new file mode 100644 index 000000000..f84ab7e1a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml @@ -0,0 +1,537 @@ + + + + sourcePath + the source path + + + outputPath + the output path + + + resultAggregation + true if all the result type have to be dumped under result. false otherwise + + + organizationCommunityMap + the organization community map + + + + hiveDbName + the target hive database name + + + hiveJdbcUrl + hive server jdbc url + + + hiveMetastoreUris + hive server metastore URIs + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + + + + + + + + + + + + + + + yarn + cluster + Dump table publication + eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/result/publication + --communityMapPath${communityMapPath} + + + + + + + + yarn + cluster + Dump table dataset + eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/dataset + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/result/dataset + --communityMapPath${communityMapPath} + + + + + + + + yarn + cluster + Dump table ORP + eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/otherresearchproduct + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/result/otherresearchproduct + --communityMapPath${communityMapPath} + + + + + + + + yarn + cluster + Dump table software + eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/software + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/result/software + --communityMapPath${communityMapPath} + + + + + + + + yarn + cluster + Dump table organization + eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/organization + --resultTableNameeu.dnetlib.dhp.schema.oaf.Organization + --outputPath${outputPath}/organization + --communityMapPath${communityMapPath} + + + + + + + + yarn + cluster + Dump table project + eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/project + --resultTableNameeu.dnetlib.dhp.schema.oaf.Project + --outputPath${outputPath}/project + --communityMapPath${communityMapPath} + + + + + + + + yarn + cluster + Dump table datasource + eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/datasource + --resultTableNameeu.dnetlib.dhp.schema.oaf.Datasource + --outputPath${outputPath}/datasource + --communityMapPath${workingDir}/communityMap + + + + + + + + yarn + cluster + Select valid table relation + eu.dnetlib.dhp.oa.graph.dump.complete.SparkSelectValidRelationsJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath} + --outputPath${workingDir}/validrelation + + + + + + + + yarn + cluster + Dump table relation + eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/validrelation + --outputPath${workingDir}/relation/relation + + + + + + + + + + + + + + + + eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities + --hdfsPath${outputPath}/communities_infrastructures/communities_infrastructure.json.gz + --nameNode${nameNode} + --isLookUpUrl${isLookUpUrl} + + + + + + + + eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation + --hdfsPath${workingDir}/relation/context + --nameNode${nameNode} + --isLookUpUrl${isLookUpUrl} + + + + + + + + yarn + cluster + Dump table relation + eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/relation + --outputPath${workingDir}/relation/contextOrg + --organizationCommunityMap${organizationCommunityMap} + --communityMapPath${communityMapPath} + + + + + + + + + + + + + + + + + yarn + cluster + Extract Relations from publication + eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/relation/publication + --communityMapPath${communityMapPath} + + + + + + + + yarn + cluster + Dump table dataset + eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/dataset + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/relation/dataset + --communityMapPath${communityMapPath} + + + + + + + + yarn + cluster + Dump table ORP + eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/otherresearchproduct + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/relation/orp + --communityMapPath${communityMapPath} + + + + + + + + yarn + cluster + Dump table software + eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/software + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/relation/software + --communityMapPath${communityMapPath} + + + + + + + + + + yarn + cluster + Collect Results and Relations and put them in the right path + eu.dnetlib.dhp.oa.graph.dump.complete.SparkCollectAndSave + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir} + --outputPath${outputPath} + --resultAggregation${resultAggregation} + + + + + + + + + Sub-workflow dump complete failed with error message ${wf:errorMessage()} + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/import.txt b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/import.txt new file mode 100644 index 000000000..15387e988 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/import.txt @@ -0,0 +1,2 @@ +## This is a classpath-based import file (this header is required) +dump_common classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml new file mode 100644 index 000000000..b990f4e49 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml @@ -0,0 +1,256 @@ + + + + + sourcePath + the source path + + + outputPath + the output path + + + hiveDbName + the target hive database name + + + hiveJdbcUrl + hive server jdbc url + + + hiveMetastoreUris + hive server metastore URIs + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + yarn + cluster + Dump funder results + eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/result/publication + --graphPath${sourcePath} + + + + + + + + yarn + cluster + Dump funder results + eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/dataset + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/result/dataset + --graphPath${sourcePath} + + + + + + + + yarn + cluster + Dump funder results + eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/otherresearchproduct + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/result/otherresearchproduct + --graphPath${sourcePath} + + + + + + + + yarn + cluster + Dump funder results + eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/software + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/result/software + --graphPath${sourcePath} + + + + + + + + + + ${wf:appPath()}/dump_common + + + + + sourcePath + ${sourcePath} + + + selectedResults + ${workingDir}/result + + + communityMapPath + ${workingDir}/communityMap + + + outputPath + ${workingDir} + + + + + + + + + + yarn + cluster + Dump funder results + eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/ext + --outputPath${outputPath} + --graphPath${sourcePath} + + + + + + + + + \ No newline at end of file From eae10c58943a6c0d7797facb69531dc442ae5b30 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 18:07:25 +0200 Subject: [PATCH 037/287] modification to allow the dump for a single community --- .../oa/graph/dump/QueryInformationSystem.java | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index fe4036925..d1f295fcf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -17,7 +17,7 @@ public class QueryInformationSystem { private ISLookUpService isLookUp; - private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + private static final String XQUERY_ALL = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + " and ($x//context/param[./@name = 'status']/text() = 'all') " @@ -28,9 +28,22 @@ public class QueryInformationSystem { "{$x//CONFIGURATION/context/@label}" + ""; - public CommunityMap getCommunityMap() + private static final String XQUERY_CI = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + + + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + + " and $x//CONFIGURATION/context[./@id=%s] " + + + " return " + + " " + + "{$x//CONFIGURATION/context/@id}" + + "{$x//CONFIGURATION/context/@label}" + + ""; + + public CommunityMap getCommunityMap(boolean singleCommunity, String community_id) throws ISLookUpException, DocumentException { - return getMap(isLookUp.quickSearchProfile(XQUERY)); + if (singleCommunity) + return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + community_id + "'"))); + return getMap(isLookUp.quickSearchProfile(XQUERY_ALL)); } From 970b387b8dfa34592bbbf2f62c8ec1eae27d272e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 18:08:10 +0200 Subject: [PATCH 038/287] modification to allow dump of a single community --- .../dhp/oa/graph/dump/SaveCommunityMap.java | 21 ++++++++++++------- .../oa/graph/dump/input_cm_parameters.json | 14 ++++++++++++- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index 6ac626518..7db19f771 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -1,4 +1,3 @@ - package eu.dnetlib.dhp.oa.graph.dump; import java.io.BufferedWriter; @@ -6,6 +5,7 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Serializable; import java.nio.charset.StandardCharsets; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; @@ -54,10 +54,10 @@ public class SaveCommunityMap implements Serializable { public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils - .toString( - SaveCommunityMap.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json")); + .toString( + SaveCommunityMap.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -71,14 +71,19 @@ public class SaveCommunityMap implements Serializable { final String isLookUpUrl = parser.get("isLookUpUrl"); log.info("isLookUpUrl: {}", isLookUpUrl); + final Boolean singleCommunity = Optional.ofNullable(parser.get("singleDeposition")) + .map(Boolean::valueOf).orElse(false); + + final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null); + final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl); - scm.saveCommunityMap(); + scm.saveCommunityMap(singleCommunity, community_id); } - private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException { - writer.write(Utils.OBJECT_MAPPER.writeValueAsString(queryInformationSystem.getCommunityMap())); + private void saveCommunityMap(boolean singleCommunity, String community_id) throws ISLookUpException, IOException, DocumentException { + writer.write(Utils.OBJECT_MAPPER.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, community_id))); writer.close(); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json index 6e42bfa64..806220510 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json @@ -18,7 +18,19 @@ "paramLongName": "outputPath", "paramDescription": "the path used to store temporary output files", "paramRequired": true - } + }, + { + "paramName": "sd", + "paramLongName": "singleDeposition", + "paramDescription": "true if the dump should be created for a single community", + "paramRequired": true + }, + { + "paramName": "ci", + "paramLongName": "communityId", + "paramDescription": "the id of the community for which to create the dump", + "paramRequired": true + } } ] From 52ce35d57b7c3e4b28996f2147182cf93099f969 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 18:08:46 +0200 Subject: [PATCH 039/287] - --- .../eu/dnetlib/dhp/oa/graph/dump/input_parameters.json | 7 ++++++- .../eu/dnetlib/dhp/oa/graph/dump/split_parameters.json | 7 ++++++- .../eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json | 7 +------ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json index 1d986fc26..e86f6e3d2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json @@ -35,7 +35,12 @@ "paramLongName":"dumpType", "paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project", "paramRequired": false - } + }, { + "paramName":"cid", + "paramLongName":"communityId", + "paramDescription": "the id of the community to be dumped", + "paramRequired": false +} ] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json index 29812188a..dec82bc97 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json @@ -25,7 +25,12 @@ "paramLongName": "isSparkSessionManaged", "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false - } + }, { + "paramName":"cid", + "paramLongName":"communityId", + "paramDescription": "the id of the community to be dumped", + "paramRequired": false +} ] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json index 683b6f4b7..b02062ede 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json @@ -12,12 +12,7 @@ "paramDescription": "The id of the concept record for a new version", "paramRequired": false }, - { - "paramName":"cmp", - "paramLongName":"communityMapPath", - "paramDescription": "the path to the serialization of the community map", - "paramRequired": false - }, + { "paramName":"di", "paramLongName":"depositionId", From 320cf02d967f86aa7f2971d5bb81aef714621d13 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 18:13:32 +0200 Subject: [PATCH 040/287] Changed the way to find results linked to projects. We verify to actually have the project on the graph before selecting the result --- .../SparkResultLinkedToProject.java | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 1a28a21f4..9c4c73d85 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -24,7 +24,8 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; - +import org.apache.spark.sql.*; +import eu.dnetlib.dhp.schema.oaf.Project; /** * Selects the results linked to projects. Only for these results the dump will be performed. * The code to perform the dump and to expend the dumped results with the informaiton related to projects @@ -59,8 +60,8 @@ public class SparkResultLinkedToProject implements Serializable { final String resultClassName = parser.get("resultTableName"); log.info("resultTableName: {}", resultClassName); - final String relationPath = parser.get("relationPath"); - log.info("relationPath: {}", relationPath); + final String graphPath = parser.get("graphPath"); + log.info("graphPath: {}", graphPath); Class inputClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); @@ -70,34 +71,32 @@ public class SparkResultLinkedToProject implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - writeResultsLinkedToProjects(spark, inputClazz, inputPath, outputPath, relationPath); + writeResultsLinkedToProjects(spark, inputClazz, inputPath, outputPath, graphPath); }); } private static void writeResultsLinkedToProjects(SparkSession spark, Class inputClazz, - String inputPath, String outputPath, String relationPath) { + String inputPath, String outputPath, String graphPath) { Dataset results = Utils .readPath(spark, inputPath, inputClazz) .filter("dataInfo.deletedbyinference = false and datainfo.invisible = false"); Dataset relations = Utils - .readPath(spark, relationPath, Relation.class) + .readPath(spark, graphPath + "/relation", Relation.class) .filter( "dataInfo.deletedbyinference = false and lower(relClass) = '" + ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'"); - relations - .joinWith( - results, relations.col("source").equalTo(results.col("id")), - "inner") - .groupByKey( - (MapFunction, String>) value -> value - ._2() - .getId(), - Encoders.STRING()) - .mapGroups((MapGroupsFunction, R>) (k, it) -> { - return it.next()._2(); - }, Encoders.bean(inputClazz)) + spark + .sql( + "Select res.* " + + "from relation rel " + + "join result res " + + "on rel.source = res.id " + + "join project p " + + "on rel.target = p.id " + + "") + .as(Encoders.bean(inputClazz)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") From 886617afd0243075836815cb6cef59e9e2c096ae Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 18:15:35 +0200 Subject: [PATCH 041/287] One result linked to more than on project is saved just once --- .../graph/dump/funderresults/SparkResultLinkedToProject.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 9c4c73d85..e0355d6d6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -97,6 +97,11 @@ public class SparkResultLinkedToProject implements Serializable { "on rel.target = p.id " + "") .as(Encoders.bean(inputClazz)) + .groupByKey( + (MapFunction< R, String>) value -> value + .getId(), + Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> it.next(), Encoders.bean(inputClazz)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") From 774cdb190e498da204f162d1497f27dd68fb880a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 13 Jul 2021 18:57:24 +0200 Subject: [PATCH 042/287] changes to mirror the last dump of the graph with the ols data model. --- .../dnetlib/doiboost/mag/MAGMappingTest.scala | 24 +++++-- .../dhp/oa/graph/dump/DumpProducts.java | 3 +- .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 12 ++-- .../graph/dump/community/CommunitySplit.java | 10 +-- .../funderresults/SparkDumpFunderResults.java | 17 +++-- .../dump/complete/schema/result_schema.json | 70 ++++++++++++++++++- .../dump/QueryInformationSystemTest.java | 2 +- .../dump/complete/CreateRelationTest.java | 2 +- .../dump/funderresult/SplitPerFunderTest.java | 4 +- 9 files changed, 113 insertions(+), 31 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala index 7eb50665e..d43e7ed37 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala @@ -70,9 +70,15 @@ class MAGMappingTest { implicit val formats = DefaultFormats - val conf = new SparkConf().setAppName("test").setMaster("local[2]") - val sc = new SparkContext(conf) - val spark = SparkSession.builder.config(sc.getConf).getOrCreate() + + val conf = new SparkConf().setAppName("test").setMaster("local[*]").set("spark.driver.host", "localhost") + + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() val path = getClass.getResource("magPapers.json").getPath import org.apache.spark.sql.Encoders @@ -95,9 +101,15 @@ class MAGMappingTest { implicit val formats = DefaultFormats - val conf = new SparkConf().setAppName("test").setMaster("local[2]") - val sc = new SparkContext(conf) - val spark = SparkSession.builder.config(sc.getConf).getOrCreate() + + val conf = new SparkConf().setAppName("test").setMaster("local[*]").set("spark.driver.host", "localhost") + + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() val path = getClass.getResource("duplicatedMagPapers.json").getPath import org.apache.spark.sql.Encoders diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java index f5b8f1c76..4ddcea9e8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java @@ -37,7 +37,8 @@ public class DumpProducts implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - execDump(spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType); + execDump( + spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType); }); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index fd8262544..28881b253 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -48,14 +48,14 @@ public class SendToZenodoHDFS implements Serializable { .orElse(false); final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null); - final String communityMapPath = parser.get("communityMapPath"); + //final String communityMapPath = parser.get("communityMapPath"); Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); - CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath); + //CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath); RemoteIterator fileStatusListIterator = fileSystem .listFiles( @@ -87,10 +87,10 @@ public class SendToZenodoHDFS implements Serializable { if (!p_string.endsWith("_SUCCESS")) { // String tmp = p_string.substring(0, p_string.lastIndexOf("/")); String name = p_string.substring(p_string.lastIndexOf("/") + 1); - log.info("Sending information for community: " + name); - if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) { - name = communityMap.get(name.substring(0, name.lastIndexOf("."))).replace(" ", "_") + ".tar"; - } +// log.info("Sending information for community: " + name); +// if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) { +// name = communityMap.get(name.substring(0, name.lastIndexOf("."))).replace(" ", "_") + ".tar"; +// } FSDataInputStream inputStream = fileSystem.open(p); zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index ec16a65e0..d64c26503 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -34,12 +34,13 @@ public class CommunitySplit implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - execSplit(spark, inputPath, outputPath, Utils.getCommunityMap(spark, communityMapPath).keySet()); + CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); + execSplit(spark, inputPath, outputPath, communityMap); }); } private static void execSplit(SparkSession spark, String inputPath, String outputPath, - Set communities) { + CommunityMap communities) { Dataset result = Utils .readPath(spark, inputPath + "/publication", CommunityResult.class) @@ -48,8 +49,9 @@ public class CommunitySplit implements Serializable { .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); communities + .keySet() .stream() - .forEach(c -> printResult(c, result, outputPath)); + .forEach(c -> printResult(c, result, outputPath + "/" + communities.get(c).replace(" ", "_"))); } @@ -61,7 +63,7 @@ public class CommunitySplit implements Serializable { .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) - .json(outputPath + "/" + c); + .json(outputPath); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index b124b76ce..9fcdcaf78 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -103,12 +103,13 @@ public class SparkDumpFunderResults implements Serializable { } else { funderdump = fundernsp.substring(0, fundernsp.indexOf("_")).toUpperCase(); } - writeFunderResult(funder, result, outputPath , funderdump); + writeFunderResult(funder, result, outputPath, funderdump); }); } - private static void dumpResults(String nsp, Dataset results, String outputPath, String funderName) { + private static void dumpResults(String nsp, Dataset results, String outputPath, + String funderName) { results.map((MapFunction) r -> { if (!Optional.ofNullable(r.getProjects()).isPresent()) { @@ -128,24 +129,22 @@ public class SparkDumpFunderResults implements Serializable { } return null; }, Encoders.bean(CommunityResult.class)) - .filter((FilterFunction) r -> r!= null) + .filter((FilterFunction) r -> r != null) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath + "/" + funderName); } - private static void writeFunderResult(String funder, Dataset results, String outputPath, - String funderDump) { + String funderDump) { if (funder.startsWith("40|irb")) { - dumpResults(funder, results, outputPath, "HRZZ"); - dumpResults(funder, results, outputPath, "MZOS"); + dumpResults(funder, results, outputPath, "HRZZ"); + dumpResults(funder, results, outputPath, "MZOS"); } else - dumpResults(funder, results, outputPath, funderDump); + dumpResults(funder, results, outputPath, funderDump); } - } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json index 867fd5a77..03cbfb074 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json @@ -1,6 +1,23 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { + "AccessRight":{ + "type":"object", + "properties":{ + "code": { + "type": "string", + "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" + }, + "label": { + "type": "string", + "description": "Label for the access mode" + }, + "scheme": { + "type": "string", + "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" + } + } + }, "ControlledField": { "type": "object", "properties": { @@ -266,6 +283,57 @@ ] } }, + "instance":{ + "type":"array", + "items":{ + "type":"object", + "properties":{ + "accessright":{ + "allOf":[ + { + "$ref":"#/definitions/AccessRight" + }, + { + "description":"The accessright of this materialization of the result" + } + ] + }, + "articleprocessingcharge":{ + "type":"object", + "properties":{ + "amount":{ + "type":"string" + }, + "currency":{ + "type":"string" + } + } + }, + "license":{ + "type":"string" + }, + "publicationdate":{ + "type":"string" + }, + "refereed":{ + "type":"string" + }, + "type":{ + "type":"string", + "description":"The specific sub-type of this materialization of the result (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" + }, + "url":{ + "description":"Description of url", + "type":"array", + "items":{ + "type":"string", + "description":"urls where it is possible to access the materialization of the result" + } + } + }, + "description":"One of the materialization for this result" + } + }, "programmingLanguage": { "type": "string", "description": "Only for results with type 'software': the programming language" @@ -302,7 +370,7 @@ "subject": { "allOf": [ {"$ref": "#/definitions/ControlledField"}, - {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."}, + {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."} ] } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java index c6666342a..499df3a09 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java @@ -70,7 +70,7 @@ public class QueryInformationSystemTest { lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap); queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(isLookUpService); - map = queryInformationSystem.getCommunityMap(); + map = queryInformationSystem.getCommunityMap(false, null); } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java index 5f21e0bc5..448034d0c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java @@ -379,7 +379,7 @@ public class CreateRelationTest { " \n" + " 675858\n" + " \n" + - " H2020-EINFRA-2015-1\n" + + " EC | H2020 | RIA\n" + " EC\n" + " West-Life\n" + " \n" + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java index eefb0b9cb..26187f0a3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java @@ -139,8 +139,8 @@ public class SplitPerFunderTest { // MZOS 1 tmp = sc - .textFile(workingDir.toString() + "/split/MZOS") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + .textFile(workingDir.toString() + "/split/MZOS") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); Assertions.assertEquals(1, tmp.count()); } From 4cb65bc64af4d4cf0b76ea3cc6ca9e67d200af83 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 14 Jul 2021 09:44:32 +0200 Subject: [PATCH 043/287] fixed process doiboost workflow: - splitted OrcidToOAF into two phase preprocess and process - updated workflow used in production --- .../crossref/SparkMapDumpIntoOAF.scala | 2 +- .../orcid/SparkConvertORCIDToOAF.scala | 56 +++------------ .../doiboost/orcid/SparkPreprocessORCID.scala | 70 +++++++++++++++++++ .../doiboost/uw/SparkMapUnpayWallToOAF.scala | 2 +- .../convert_crossref_dump_to_oaf_params.json | 6 ++ .../doiboost/convert_orcid_to_oaf_params.json | 6 ++ .../doiboost/convert_uw_to_oaf_params.json | 6 ++ .../dhp/doiboost/oozie_app/workflow.xml | 2 +- .../doiboost/orcid_oaf/oozie_app/workflow.xml | 2 +- ...rams.json => preprocess_orcid_params.json} | 3 +- .../orcid/MappingORCIDToOAFTest.scala | 2 +- 11 files changed, 103 insertions(+), 54 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{convert_map_to_oaf_params.json => preprocess_orcid_params.json} (59%) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala index 57acaf404..c65916610 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala @@ -21,7 +21,7 @@ object SparkMapDumpIntoOAF { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index a359eb3c6..9117bcb34 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -1,61 +1,18 @@ package eu.dnetlib.doiboost.orcid -import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.merge.AuthorMerger import eu.dnetlib.dhp.schema.oaf.Publication -import eu.dnetlib.dhp.schema.orcid.OrcidDOI import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def fixORCIDItem(item :ORCIDItem):ORCIDItem = { - new ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) - - } - - - def run(spark:SparkSession,sourcePath:String,workingPath:String, targetPath:String):Unit = { - import spark.implicits._ - implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - - val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) - - spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") - - val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) - - spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") - - val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] - - val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] - - works.joinWith(authors, authors("oid").equalTo(works("oid"))) - .map(i =>{ - val doi = i._1.doi - var author = i._2 - (doi, author) - }).groupBy(col("_1").alias("doi")) - .agg(collect_list(col("_2")).alias("authors")).as[ORCIDItem] - .map(s => fixORCIDItem(s)) - .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") - - val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] - - logger.info("Converting ORCID to OAF") - dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) - } - def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession @@ -64,11 +21,16 @@ object SparkConvertORCIDToOAF { .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] + import spark.implicits._ - val sourcePath = parser.get("sourcePath") val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") - run(spark, sourcePath, workingPath, targetPath) + + val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] + + logger.info("Converting ORCID to OAF") + dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala new file mode 100644 index 000000000..d6911cfa7 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala @@ -0,0 +1,70 @@ +package eu.dnetlib.doiboost.orcid + +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.merge.AuthorMerger +import eu.dnetlib.dhp.schema.oaf.Publication +import eu.dnetlib.dhp.schema.orcid.OrcidDOI +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +object SparkPreprocessORCID { + val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) + + def fixORCIDItem(item :ORCIDItem):ORCIDItem = { + ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) + + } + + + def run(spark:SparkSession,sourcePath:String,workingPath:String):Unit = { + import spark.implicits._ + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] + + val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) + + spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") + + val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) + + spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") + + val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] + + val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] + + works.joinWith(authors, authors("oid").equalTo(works("oid"))) + .map(i =>{ + val doi = i._1.doi + val author = i._2 + (doi, author) + }).groupBy(col("_1").alias("doi")) + .agg(collect_list(col("_2")).alias("authors")).as[ORCIDItem] + .map(s => fixORCIDItem(s)) + .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") + } + + def main(args: Array[String]): Unit = { + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val sourcePath = parser.get("sourcePath") + val workingPath = parser.get("workingPath") + + run(spark, sourcePath, workingPath) + + } + +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala index a72e4b0d6..4530926f1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala @@ -18,7 +18,7 @@ object SparkMapUnpayWallToOAF { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json new file mode 100644 index 000000000..da324f8c4 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json @@ -0,0 +1,6 @@ +[ + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the OAF Orcid transformed", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source path ", "paramRequired": false}, + {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json new file mode 100644 index 000000000..6c9ca5ede --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json @@ -0,0 +1,6 @@ +[ + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the OAF Orcid transformed", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path ", "paramRequired": false}, + {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json new file mode 100644 index 000000000..da324f8c4 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json @@ -0,0 +1,6 @@ +[ + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the OAF Orcid transformed", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source path ", "paramRequired": false}, + {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml index fa47e142d..34b4b5c5e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml @@ -368,7 +368,7 @@ yarn-cluster cluster Convert ORCID to Dataset - eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF + eu.dnetlib.doiboost.orcid.SparkPreprocessORCID dhp-doiboost-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_oaf/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_oaf/oozie_app/workflow.xml index bffde793b..0670e18de 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_oaf/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_oaf/oozie_app/workflow.xml @@ -34,7 +34,7 @@ yarn-cluster cluster Convert ORCID to Dataset - eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF + eu.dnetlib.doiboost.orcid.SparkPreprocessORCID dhp-doiboost-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json similarity index 59% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json index 152103570..fdc1e2f20 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json @@ -1,7 +1,6 @@ [ {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the path of the Orcid Input file", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path ", "paramRequired": false}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the working dir path", "paramRequired": true}, - {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} + {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala index d620c4ef3..5fced8edb 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -46,7 +46,7 @@ class MappingORCIDToOAFTest { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] import spark.implicits._ - SparkConvertORCIDToOAF.run( spark,sourcePath, workingPath, targetPath) + SparkPreprocessORCID.run( spark,sourcePath, workingPath) val mapper = new ObjectMapper() From 1cdd09cd8ed069c2d2dc2d5856faf154c15a3558 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 11:14:59 +0200 Subject: [PATCH 044/287] Tentative fix for testing of Jenkins --- .../dnetlib/doiboost/mag/MAGMappingTest.scala | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala index 7eb50665e..dbdedcbd0 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala @@ -70,9 +70,15 @@ class MAGMappingTest { implicit val formats = DefaultFormats - val conf = new SparkConf().setAppName("test").setMaster("local[2]") - val sc = new SparkContext(conf) - val spark = SparkSession.builder.config(sc.getConf).getOrCreate() + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() val path = getClass.getResource("magPapers.json").getPath import org.apache.spark.sql.Encoders @@ -95,9 +101,15 @@ class MAGMappingTest { implicit val formats = DefaultFormats - val conf = new SparkConf().setAppName("test").setMaster("local[2]") - val sc = new SparkContext(conf) - val spark = SparkSession.builder.config(sc.getConf).getOrCreate() + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() val path = getClass.getResource("duplicatedMagPapers.json").getPath import org.apache.spark.sql.Encoders From 441701c85c4e41c50bbdd49334234a28ff554c01 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 11:36:30 +0200 Subject: [PATCH 045/287] DoiBoost AccessRigh #4362 - If multiple licenses are available, take the one applied to 'vor' --- .../doiboost/crossref/Crossref2Oaf.scala | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index fb96717d9..3f2f63fe6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -168,12 +168,22 @@ case object Crossref2Oaf { // Mapping instance val instance = new Instance() val license = for { - JString(lic) <- json \ "license" \ "URL" - } yield asField(lic) - val l = license.filter(d => StringUtils.isNotBlank(d.getValue)) - if (l.nonEmpty) - instance.setLicense(l.head) - + JObject(license) <- json \ "license" + JField("URL", JString(lic)) <- license + JField("content-version", JString(content_version)) <- license + } yield (asField(lic), content_version) + val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue)) + if (l.nonEmpty){ + if (l exists (d => d._2.equals("vor"))){ + for(d <- l){ + if (d._2.equals("vor")){ + instance.setLicense(d._1) + } + } + } + else{ + instance.setLicense(l.head._1)} + } // Ticket #6281 added pid to Instance instance.setPid(result.getPid) From 981b1018f6459b73205e0c2a90fbcf3fc72c52e8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 11:43:00 +0200 Subject: [PATCH 046/287] DoiBoost AccessRigh #4362 - decide access right according to licence. Default access right is Unknown --- .../doiboost/DoiBoostMappingUtil.scala | 65 ++++++++++++++++++- .../doiboost/crossref/Crossref2Oaf.scala | 4 +- 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index c0939fec1..2558e1c67 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -1,12 +1,16 @@ package eu.dnetlib.doiboost +import java.time.LocalDate +import java.time.format.DateTimeFormatter + import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{AccessRight, DataInfo, Dataset, Field, Instance, KeyValue, Oaf, Organization, Publication, Qualifier, Relation, Result, StructuredProperty} +import eu.dnetlib.dhp.schema.oaf.{AccessRight, DataInfo, Dataset, Field, Instance, KeyValue, Oaf, OpenAccessRoute, Organization, Publication, Qualifier, Relation, Result, StructuredProperty} import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.lang3.StringUtils import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.doiboost.DoiBoostMappingUtil.{getClosedAccessQualifier, getEmbargoedAccessQualifier, getUnknownQualifier} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse @@ -118,6 +122,51 @@ object DoiBoostMappingUtil { } + def decideAccessRight(lic : Field[String], date:String) : AccessRight = { + if(lic == null){ + //Default value Unknown + return getUnknownQualifier() + } + val license : String = lic.getValue + //CC licenses + if(license.startsWith("cc") || + license.startsWith("http://creativecommons.org/licenses") || + license.startsWith("https://creativecommons.org/licenses") || + + //ACS Publications Author choice licenses (considered OPEN also by Unpaywall) + license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") || + license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") || + license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") || + + //APA (considered OPEN also by Unpaywall) + license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")){ + + val oaq : AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + + //OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED) + if(license.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")){ + val now = java.time.LocalDate.now + val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") + + val pub_date = LocalDate.parse(date, formatter) + + if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ + val oaq : AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + else{ + return getEmbargoedAccessQualifier() + } + } + + return getClosedAccessQualifier() + + } + def getOpenAccessQualifier():AccessRight = { @@ -129,6 +178,20 @@ object DoiBoostMappingUtil { } + def getUnknownQualifier():AccessRight = { + OafMapperUtils.accessRight("UNKNOWN","not available",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + } + + + def getEmbargoedAccessQualifier():AccessRight = { + OafMapperUtils.accessRight("EMBARGO","Embargo",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + } + + def getClosedAccessQualifier():AccessRight = { + OafMapperUtils.accessRight("CLOSED","Closed Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + } + + def extractInstance(r:Result):Option[Instance] = { r.getInstance().asScala.find(i => i.getInstancetype != null && i.getInstancetype.getClassid.nonEmpty) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 3f2f63fe6..25f0ff381 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -4,7 +4,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} import eu.dnetlib.dhp.utils.DHPUtils -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ +import eu.dnetlib.doiboost.DoiBoostMappingUtil.{decideAccessRight, _} import org.apache.commons.lang.StringUtils import org.json4s import org.json4s.DefaultFormats @@ -195,7 +195,7 @@ case object Crossref2Oaf { OafMapperUtils.qualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS, ModelConstants.DNET_REVIEW_LEVELS)) } - instance.setAccessright(getRestrictedQualifier()) + instance.setAccessright(decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)) instance.setInstancetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) result.setResourcetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) From 6222adf17609ffe956be0bf6df2976363f182d3e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 11:46:31 +0200 Subject: [PATCH 047/287] DoiBoost AccessRigh #4362 - added resources and test for crossref mapping (licence part included) --- .../crossref/CrossrefMappingTest.scala | 97 ++ .../crossref/publication_license_embargo.json | 1537 +++++++++++++++++ .../publication_license_embargo_open.json | 1537 +++++++++++++++++ .../crossref/publication_license_open.json | 1537 +++++++++++++++++ .../crossref/publication_license_vor.json | 1537 +++++++++++++++++ 5 files changed, 6245 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_open.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_vor.json diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index 0fa34d88e..63555bcbd 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -492,6 +492,103 @@ class CrossrefMappingTest { } + @Test + def testLicenseVorClosed() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_vor.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) + + + + + } + + @Test + def testLicenseOpen() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_open.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + + @Test + def testLicenseEmbargoOpen() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo_open.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + + @Test + def testLicenseEmbargo() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + + } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json new file mode 100644 index 000000000..47ca55f34 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json @@ -0,0 +1,1537 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json new file mode 100644 index 000000000..e667f3c7f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json @@ -0,0 +1,1537 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2020, +2, +22 +] +] +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_open.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_open.json new file mode 100644 index 000000000..225a36b1f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_open.json @@ -0,0 +1,1537 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_vor.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_vor.json new file mode 100644 index 000000000..f2e91a23f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_vor.json @@ -0,0 +1,1537 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "https://www.springer.com/vor", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file From f4f7c6f9d34ac050e60bf35b42518fdb2082b280 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 13:52:52 +0200 Subject: [PATCH 048/287] DoiBoost AccessRigh #4362 - Unpaywall mapped to OAF with OPEN instance (non oa are filtered out) (unknown hostedby) + map the color as it is --- .../dnetlib/doiboost/uw/UnpayWallToOAF.scala | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index cc758bcae..0a5ba063f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -11,6 +11,7 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ import eu.dnetlib.doiboost.DoiBoostMappingUtil._ +import eu.dnetlib.doiboost.uw.UnpayWallToOAF.get_unpaywall_color @@ -23,6 +24,21 @@ case class OALocation(evidence:Option[String], host_type:Option[String], is_best object UnpayWallToOAF { val logger: Logger = LoggerFactory.getLogger(getClass) + + def get_unpaywall_color(input:String):Option[OpenAccessRoute] = { + if(input.equalsIgnoreCase("close")) + return None + if(input.equalsIgnoreCase("green")) + return Some(OpenAccessRoute.green) + if(input.equalsIgnoreCase("bronze")) + return Some(OpenAccessRoute.bronze) + if(input.equalsIgnoreCase("hybrid")) + return Some(OpenAccessRoute.hybrid) + else + return Some(OpenAccessRoute.gold) + + } + def get_color(is_oa:Boolean, location: OALocation, journal_is_oa:Boolean):Option[OpenAccessRoute] = { if (is_oa) { if (location.host_type.isDefined) { @@ -65,7 +81,7 @@ object UnpayWallToOAF { val oaLocation:OALocation = (json \ "best_oa_location").extractOrElse[OALocation](null) - val colour = get_color(is_oa, oaLocation, journal_is_oa) + val colour = get_unpaywall_color((json \ "oa_status").extractOrElse[String](null)) pub.setCollectedfrom(List(createUnpayWallCollectedFrom()).asJava) pub.setDataInfo(generateDataInfo()) From 09ad7b2a9e3bc6c64c12d345da56aaccaf3b1fa1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 13:58:05 +0200 Subject: [PATCH 049/287] DoiBoost AccessRigh #4362 - Unpaywall mapped to OAF with OPEN instance (non oa are filtered out) (unknown hostedby) + map the color as it is --- .../src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index 0a5ba063f..c8324cde1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -26,7 +26,7 @@ object UnpayWallToOAF { def get_unpaywall_color(input:String):Option[OpenAccessRoute] = { - if(input.equalsIgnoreCase("close")) + if(input == null || input.equalsIgnoreCase("close")) return None if(input.equalsIgnoreCase("green")) return Some(OpenAccessRoute.green) From 10068c00ea1dc757262b4bbe2307fcb1b0e3fd30 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 14 Jul 2021 14:37:06 +0200 Subject: [PATCH 050/287] Code refactor: - removed old workflows in doiboost - splitted workflow of doiboost in preprocess and process --- .../orcid/SparkConvertORCIDToOAF.scala | 17 +- .../doiboost/orcid/SparkPreprocessORCID.scala | 2 +- .../doiboost/crossref/oozie_app/workflow.xml | 101 ---- .../intersection/oozie_app/config-default.xml | 38 -- .../intersection/oozie_app/workflow.xml | 96 ---- .../doiboost/mag/oozie_app/config-default.xml | 42 -- .../dhp/doiboost/mag/oozie_app/workflow.xml | 92 ---- .../dhp/doiboost/oozie_app/config-default.xml | 42 -- .../dhp/doiboost/oozie_app/workflow.xml | 453 ------------------ .../oozie_app/config-default.xml | 0 .../preprocess/oozie_app/workflow.xml | 216 +++++++++ .../unpaywall/oozie_app/config-default.xml | 38 -- .../doiboost/unpaywall/oozie_app/workflow.xml | 55 --- .../dnetlib/doiboost/mag/MAGMappingTest.scala | 25 +- .../orcid/MappingORCIDToOAFTest.scala | 4 + 15 files changed, 240 insertions(+), 981 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{crossref => preprocess}/oozie_app/config-default.xml (100%) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index 9117bcb34..fa4a93e00 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -10,6 +10,16 @@ import org.slf4j.{Logger, LoggerFactory} object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) + + def run(spark:SparkSession, workingPath:String, targetPath:String) :Unit = { + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] + import spark.implicits._ + val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] + + logger.info("Converting ORCID to OAF") + dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) + } + def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json"))) @@ -21,16 +31,11 @@ object SparkConvertORCIDToOAF { .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() - implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - import spark.implicits._ val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") - val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] - - logger.info("Converting ORCID to OAF") - dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) + run(spark,workingPath, targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala index d6911cfa7..31f331912 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala @@ -50,7 +50,7 @@ object SparkPreprocessORCID { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml deleted file mode 100644 index 63c2e9ef2..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml +++ /dev/null @@ -1,101 +0,0 @@ - - - - workingPath - the working dir base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - timestamp - Timestamp for incremental Harvesting - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.CrossrefImporter - -t${workingPath}/input/crossref/index_update - -n${nameNode} - -ts${timestamp} - - - - - - - - yarn-cluster - cluster - ExtractCrossrefToOAF - eu.dnetlib.doiboost.crossref.CrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --workingPath/data/doiboost/input/crossref - --masteryarn-cluster - - - - - - - - - - - - - - - - - - yarn-cluster - cluster - ConvertCrossrefToOAF - eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --sourcePath${workingPath}/input/crossref/crossref_ds - --targetPath${workingPath}/process/ - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/config-default.xml deleted file mode 100644 index cf617a84c..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/config-default.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml deleted file mode 100644 index dcde62c9d..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml +++ /dev/null @@ -1,96 +0,0 @@ - - - - hostedByMapPath - the Hosted By Map Path - - - affiliationPath - the Affliation Path - - - paperAffiliationPath - the paperAffiliation Path - - - workingDirPath - the Working Path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - yarn-cluster - cluster - Create DOIBoost Infospace - eu.dnetlib.doiboost.SparkGenerateDoiBoost - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --hostedByMapPath${hostedByMapPath} - --affiliationPath${affiliationPath} - --paperAffiliationPath${paperAffiliationPath} - --workingDirPath${workingDirPath} - --masteryarn-cluster - - - - - - - - - yarn-cluster - cluster - Generate DOIBoost ActionSet - eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --dbPublicationPath${workingDirPath}/doiBoostPublicationFiltered - --dbDatasetPath${workingDirPath}/crossrefDataset - --crossRefRelation${workingDirPath}/crossrefRelation - --dbaffiliationRelationPath${workingDirPath}/doiBoostPublicationAffiliation - -do${workingDirPath}/doiBoostOrganization - --targetPath${workingDirPath}/actionDataSet - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/config-default.xml deleted file mode 100644 index 59e5c059f..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/config-default.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.wf.rerun.failnodes - false - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml deleted file mode 100644 index 9d19dddc7..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml +++ /dev/null @@ -1,92 +0,0 @@ - - - - sourcePath - the working dir base path - - - targetPath - the working dir base path - - - workingPath - the working dir base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - yarn-cluster - cluster - Convert Mag to Dataset - eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - ${sparkExtraOPT} - - --sourcePath${sourcePath} - --targetPath${workingPath} - --masteryarn-cluster - - - - - - - - - - yarn-cluster - cluster - Convert Mag to OAF Dataset - eu.dnetlib.doiboost.mag.SparkPreProcessMAG - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --sourcePath${workingPath} - --workingPath${workingPath}/process - --targetPath${targetPath} - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/config-default.xml deleted file mode 100644 index 508202e30..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/config-default.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml deleted file mode 100644 index 34b4b5c5e..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml +++ /dev/null @@ -1,453 +0,0 @@ - - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorIntersectionMemory - memory for individual executor - - - - sparkExecutorCores - number of cores used by single executor - - - - - - workingPath - the working Path - - - - hostedByMapPath - the hostedByMap Path - - - outputPath - the Path of the sequence file action set - - - - - - inputPathCrossref - the Crossref input path - - - crossrefDumpPath - the Crossref dump path - - - - - - - - - - - - - - - - - MAGDumpPath - the MAG dump working path - - - - inputPathMAG - the MAG working path - - - - - - inputPathUnpayWall - the UnpayWall working path - - - - - inputPathOrcid - the ORCID input path - - - - workingPathOrcid - the ORCID working path - - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - ${wf:conf('resumeFrom') eq 'ConvertCrossrefToOAF'} - ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} - ${wf:conf('resumeFrom') eq 'PreprocessMag'} - ${wf:conf('resumeFrom') eq 'PreprocessUW'} - ${wf:conf('resumeFrom') eq 'PreprocessORCID'} - ${wf:conf('resumeFrom') eq 'CreateDOIBoost'} - ${wf:conf('resumeFrom') eq 'GenerateActionSet'} - ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords - --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz - --workingPath${crossrefDumpPath} - --outputPath${crossrefDumpPath}/files/ - - - - - - - - yarn-cluster - cluster - SparkUnpackCrossrefEntries - eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files - --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - - - - - yarn-cluster - cluster - SparkGenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=7G - --executor-cores=2 - --driver-memory=7G - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/crossref_unpack/ - --targetPath${inputPathCrossref}/crossref_ds - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - yarn-cluster - cluster - Convert Mag to Dataset - eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${MAGDumpPath} - --targetPath${inputPathMAG}/dataset - --masteryarn-cluster - - - - - - - - - yarn-cluster - cluster - ConvertCrossrefToOAF - eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${inputPathCrossref}/crossref_ds - --targetPath${workingPath} - --masteryarn-cluster - - - - - - - - yarn-cluster - cluster - Convert Mag to OAF Dataset - eu.dnetlib.doiboost.mag.SparkProcessMAG - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorIntersectionMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${inputPathMAG}/dataset - --workingPath${inputPathMAG}/process - --targetPath${workingPath} - --masteryarn-cluster - - - - - - - - - - yarn-cluster - cluster - Convert UnpayWall to Dataset - eu.dnetlib.doiboost.uw.SparkMapUnpayWallToOAF - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${inputPathUnpayWall}/uw_extracted - --targetPath${workingPath}/uwPublication - --masteryarn-cluster - - - - - - - - - yarn-cluster - cluster - Convert ORCID to Dataset - eu.dnetlib.doiboost.orcid.SparkPreprocessORCID - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${inputPathOrcid} - --workingPath${workingPathOrcid} - --targetPath${workingPath}/orcidPublication - --masteryarn-cluster - - - - - - - - - yarn-cluster - cluster - Create DOIBoost Infospace - eu.dnetlib.doiboost.SparkGenerateDoiBoost - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorIntersectionMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --hostedByMapPath${hostedByMapPath} - --affiliationPath${inputPathMAG}/dataset/Affiliations - --paperAffiliationPath${inputPathMAG}/dataset/PaperAuthorAffiliations - --workingPath${workingPath} - --masteryarn-cluster - - - - - - - - - yarn-cluster - cluster - Generate DOIBoost ActionSet - eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --dbPublicationPath${workingPath}/doiBoostPublicationFiltered - --dbDatasetPath${workingPath}/crossrefDataset - --crossRefRelation${workingPath}/crossrefRelation - --dbaffiliationRelationPath${workingPath}/doiBoostPublicationAffiliation - --dbOrganizationPath${workingPath}/doiBoostOrganization - --targetPath${workingPath}/actionDataSet - --sFilePath${outputPath} - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/config-default.xml rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml new file mode 100644 index 000000000..03f7b7566 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -0,0 +1,216 @@ + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + + inputPathCrossref + the Crossref input path + + + crossrefDumpPath + the Crossref dump path + + + + + MAGDumpPath + the MAG dump working path + + + + inputPathMAG + the MAG working path + + + + + + inputPathOrcid + the ORCID input path + + + + workingPathOrcid + the ORCID working path + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} + ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} + ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} + ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} + ${wf:conf('resumeFrom') eq 'PreProcessORCID'} + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords + --hdfsServerUri${nameNode} + --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz + --workingPath${crossrefDumpPath} + --outputPath${crossrefDumpPath}/files/ + + + + + + + + yarn-cluster + cluster + SparkUnpackCrossrefEntries + eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn-cluster + --sourcePath${crossrefDumpPath}/files + --targetPath${crossrefDumpPath}/crossref_unpack/ + + + + + + + + + yarn-cluster + cluster + SparkGenerateCrossrefDataset + eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset + dhp-doiboost-${projectVersion}.jar + + --executor-memory=7G + --executor-cores=2 + --driver-memory=7G + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn-cluster + --sourcePath${crossrefDumpPath}/crossref_unpack/ + --targetPath${inputPathCrossref}/crossref_ds + + + + + + + + + + + + + + + + + + + + + + + + + + + + yarn-cluster + cluster + Convert Mag to Dataset + eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${MAGDumpPath} + --targetPath${inputPathMAG}/dataset + --masteryarn-cluster + + + + + + + + + yarn-cluster + cluster + Convert ORCID to Dataset + eu.dnetlib.doiboost.orcid.SparkPreprocessORCID + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${inputPathOrcid} + --workingPath${workingPathOrcid} + --masteryarn-cluster + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/config-default.xml deleted file mode 100644 index cf617a84c..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/config-default.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/workflow.xml deleted file mode 100644 index d2a69752e..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/workflow.xml +++ /dev/null @@ -1,55 +0,0 @@ - - - - sourcePath - the working dir base path - - - targetPath - the working dir base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - yarn-cluster - cluster - Convert UnpayWall to Dataset - eu.dnetlib.doiboost.uw.SparkMapUnpayWallToOAF - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --sourcePath${sourcePath}/uw_extracted - --targetPath${targetPath} - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala index dbdedcbd0..46d4ec08d 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala @@ -1,22 +1,15 @@ package eu.dnetlib.doiboost.mag -import java.sql.Timestamp - -import eu.dnetlib.dhp.schema.oaf.Publication -import org.apache.htrace.fasterxml.jackson.databind.SerializationFeature -import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.api.java.function.MapFunction -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig} -import org.junit.jupiter.api.Test -import org.slf4j.{Logger, LoggerFactory} +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, SparkSession} +import org.codehaus.jackson.map.ObjectMapper import org.junit.jupiter.api.Assertions._ -import org.apache.spark.sql.functions._ +import org.junit.jupiter.api.Test +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} -import scala.collection.JavaConverters._ +import java.sql.Timestamp import scala.io.Source -import scala.reflect.ClassTag -import scala.util.matching.Regex @@ -65,8 +58,7 @@ class MAGMappingTest { @Test def normalizeDoiTest():Unit = { - import org.json4s.jackson.Serialization.write - import org.json4s.DefaultFormats + implicit val formats = DefaultFormats @@ -96,7 +88,6 @@ class MAGMappingTest { @Test def normalizeDoiTest2():Unit = { - import org.json4s.jackson.Serialization.write import org.json4s.DefaultFormats implicit val formats = DefaultFormats diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala index 5fced8edb..b484dc087 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -48,6 +48,8 @@ class MappingORCIDToOAFTest { SparkPreprocessORCID.run( spark,sourcePath, workingPath) + SparkConvertORCIDToOAF.run(spark, workingPath,targetPath) + val mapper = new ObjectMapper() @@ -62,6 +64,8 @@ class MappingORCIDToOAFTest { println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p.first())) spark.close() + + } From a65667d2175d2b6a83a04462699d74b7aee13305 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Wed, 14 Jul 2021 15:07:07 +0200 Subject: [PATCH 051/287] added publication to dataset even if no contributors --- .../SparkGenEnrichedOrcidWorks.java | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 5bcec7224..ca39f99cc 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -4,6 +4,7 @@ package eu.dnetlib.doiboost.orcidnodoi; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -32,10 +33,7 @@ import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.orcid.AuthorData; -import eu.dnetlib.dhp.schema.orcid.AuthorSummary; -import eu.dnetlib.dhp.schema.orcid.Work; -import eu.dnetlib.dhp.schema.orcid.WorkDetail; +import eu.dnetlib.dhp.schema.orcid.*; import eu.dnetlib.doiboost.orcid.json.JsonHelper; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; @@ -111,6 +109,10 @@ public class SparkGenEnrichedOrcidWorks { Encoders.bean(WorkDetail.class)); logger.info("Works data loaded: " + workDataset.count()); + final LongAccumulator warnNotFoundContributors = spark + .sparkContext() + .longAccumulator("warnNotFoundContributors"); + JavaRDD> enrichedWorksRDD = workDataset .joinWith( authorDataset, @@ -119,7 +121,21 @@ public class SparkGenEnrichedOrcidWorks { (MapFunction, Tuple2>) value -> { WorkDetail w = value._1; AuthorData a = value._2; - AuthorMatcher.match(a, w.getContributors()); + if (w.getContributors() == null + || (w.getContributors() != null && w.getContributors().size() == 0)) { + Contributor c = new Contributor(); + c.setName(a.getName()); + c.setSurname(a.getSurname()); + c.setCreditName(a.getCreditName()); + c.setOid(a.getOid()); + List contributors = Arrays.asList(c); + w.setContributors(contributors); + if (warnNotFoundContributors != null) { + warnNotFoundContributors.add(1); + } + } else { + AuthorMatcher.match(a, w.getContributors()); + } return new Tuple2<>(a.getOid(), JsonHelper.createOidWork(w)); }, Encoders.tuple(Encoders.STRING(), Encoders.STRING())) @@ -180,6 +196,7 @@ public class SparkGenEnrichedOrcidWorks { logger.info("parsedPublications: " + parsedPublications.value().toString()); logger.info("enrichedPublications: " + enrichedPublications.value().toString()); + logger.info("warnNotFoundContributors: " + warnNotFoundContributors.value().toString()); logger.info("errorsGeneric: " + errorsGeneric.value().toString()); logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString()); logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString()); From 66604bb2b4fe045858b1d54ac3fc2df93bbbdeba Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Wed, 14 Jul 2021 16:44:51 +0200 Subject: [PATCH 052/287] added absolute path to process folder --- .../doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java | 2 +- .../dhp/doiboost/orcidnodoi/oozie_app/workflow.xml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index c342d2e79..1d47808ef 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -188,7 +188,7 @@ public class SparkGenEnrichedOrcidWorks { OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, p)))) .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) .saveAsNewAPIHadoopFile( - workingPath.concat(outputEnrichedWorksPath), + outputEnrichedWorksPath, Text.class, Text.class, SequenceFileOutputFormat.class, diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml index f7ac04821..05492d50d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml @@ -11,7 +11,7 @@ path where to store the action set - processOutputFolder + processOutputPath process_no_doi_dataset_prod temporary path where to store the action set @@ -71,7 +71,7 @@ - + @@ -97,7 +97,7 @@ --workingPath${workingPath}/ --hdfsServerUri${nameNode} --orcidDataFolderlast_orcid_dataset - --outputEnrichedWorksPath${processOutputFolder} + --outputEnrichedWorksPath${processOutputPath} @@ -105,7 +105,7 @@ - ${workingPath}/${processOutputFolder}/* + ${processOutputPath}/* ${outputPath} From 2dc50c0999738eb3051fb8570f3e3a0b4bb81c7e Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Wed, 14 Jul 2021 17:02:22 +0200 Subject: [PATCH 053/287] added default value to process path --- .../eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml index 05492d50d..04ca05af2 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml @@ -12,7 +12,7 @@ processOutputPath - process_no_doi_dataset_prod + /data/orcid_activities_2020/process_no_doi_dataset_prod temporary path where to store the action set From 34506df1b607241f72f699eafeb4472bc6f11d27 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Jul 2021 10:29:51 +0200 Subject: [PATCH 054/287] DoiBoost AccessRigh #4362 - if the journal is open, the OPEN access right is set to all instances and color is GOLD (overwrite if the color was already set in one of the previous steps) --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 2558e1c67..6a2f0e45f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -213,8 +213,10 @@ object DoiBoostMappingUtil { if (item != null) { hb.setValue(item.officialname) hb.setKey(generateDSId(item.id)) - if (item.openAccess) + if (item.openAccess) { i.setAccessright(getOpenAccessQualifier()) + i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) + } val ar = getOpenAccessQualifier() publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) } From acd60563309629d6d349f0aa0bdf92a7bf44b8a4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Jul 2021 12:47:10 +0200 Subject: [PATCH 055/287] added shell action to automatically download the new dump and put it in a specified hdfs location --- .../doiboost/preprocess/oozie_app/download.sh | 2 ++ .../preprocess/oozie_app/workflow.xml | 25 ++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh new file mode 100644 index 000000000..98984e249 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh @@ -0,0 +1,2 @@ +#!bin/bash +curl -LSs $1 | hdfs dfs -put - $2$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 03f7b7566..d63e54b8d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,12 +63,14 @@ + ${wf:conf('resumeFrom') eq 'Skip'} + ${wf:conf('resumeFrom') eq 'ImportCrossRef'} ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} ${wf:conf('resumeFrom') eq 'PreProcessORCID'} - + @@ -76,6 +78,27 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + download.sh + + + + + + ${jobTracker} From c4b18e6ccb2946d9de0923b5d5bab5521907cae6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Jul 2021 15:01:25 +0200 Subject: [PATCH 056/287] changed the download.sh, added skip step to allow to not execute one phase and changed the workflow sequence of steps --- .../dhp/doiboost/preprocess/oozie_app/download.sh | 2 +- .../dhp/doiboost/preprocess/oozie_app/workflow.xml | 13 +++++++------ .../dhp/doiboost/process/oozie_app/workflow.xml | 1 + 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh index 98984e249..dfb0db708 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh @@ -1,2 +1,2 @@ #!bin/bash -curl -LSs $1 | hdfs dfs -put - $2$3 \ No newline at end of file +curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index d63e54b8d..a1b8804fa 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -70,7 +70,7 @@ ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} ${wf:conf('resumeFrom') eq 'PreProcessORCID'} - + @@ -78,6 +78,7 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + ${jobTracker} @@ -105,7 +106,7 @@ ${nameNode} eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz + --crossrefFileNameTarGz${crossrefdumpfilename} --workingPath${crossrefDumpPath} --outputPath${crossrefDumpPath}/files/ @@ -161,16 +162,16 @@ --targetPath${inputPathCrossref}/crossref_ds - + - - + + - + diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index f845d97f3..e75e1d8e1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -75,6 +75,7 @@ + ${wf:conf('resumeFrom') eq 'Skip'} ${wf:conf('resumeFrom') eq 'PreprocessMag'} ${wf:conf('resumeFrom') eq 'PreprocessUW'} ${wf:conf('resumeFrom') eq 'ProcessORCID'} From 199123b74b3a988a2cadf5bec843998e734d494b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Jul 2021 17:30:27 +0200 Subject: [PATCH 057/287] DoiBoost AccessRigh #4362 - Fixed issue on date formatting. Added test method and associated resource --- .../doiboost/DoiBoostMappingUtil.scala | 50 +- .../crossref/CrossrefMappingTest.scala | 21 + .../publication_license_embargo_datetime.json | 1538 +++++++++++++++++ 3 files changed, 1600 insertions(+), 9 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 6a2f0e45f..b0c6e009b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -149,18 +149,50 @@ object DoiBoostMappingUtil { //OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED) if(license.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")){ val now = java.time.LocalDate.now - val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") - val pub_date = LocalDate.parse(date, formatter) + try{ + val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd")) + if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ + val oaq : AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + else{ + return getEmbargoedAccessQualifier() + } + }catch { + case e: Exception => { + try{ + val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")) + if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ + val oaq : AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + else{ + return getEmbargoedAccessQualifier() + } + }catch{ + case ex: Exception => return getClosedAccessQualifier() + } + } - if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ - val oaq : AccessRight = getOpenAccessQualifier() - oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) - return oaq - } - else{ - return getEmbargoedAccessQualifier() } + + //val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") + + + + // val pub_date = LocalDate.parse(date, formatter) + +// if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ +// val oaq : AccessRight = getOpenAccessQualifier() +// oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) +// return oaq +// } +// else{ +// return getEmbargoedAccessQualifier() +// } } return getClosedAccessQualifier() diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index 63555bcbd..75fb3f787 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -589,6 +589,27 @@ class CrossrefMappingTest { } + @Test + def testLicenseEmbargoDateTime() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo_datetime.json")).mkString + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json new file mode 100644 index 000000000..c84e16350 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json @@ -0,0 +1,1538 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2021, +2, +22 +] +], + "date-time": "2021-05-17T15:08:12Z" +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file From 59530a14fb747b3c29a2fa36dcd2c5fa0e13eebe Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 19 Jul 2021 12:34:35 +0200 Subject: [PATCH 058/287] DoiBoost AccessRigh #4362 - set BestAccessRight with the ususal comparator --- .../doiboost/DoiBoostMappingUtil.scala | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index b0c6e009b..149d8ee5c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -258,17 +258,18 @@ object DoiBoostMappingUtil { i.setHostedby(hb) }) - val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid) - if (ar.nonEmpty) { - if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){ - val ar = getOpenAccessQualifier() - publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) - } - else { - val ar = getRestrictedQualifier() - publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) - } - } + publication.setBestaccessright(OafMapperUtils.createBestAccessRights(publication.getInstance())) +// val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid) +// if (ar.nonEmpty) { +// if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){ +// val ar = getOpenAccessQualifier() +// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) +// } +// else { +// val ar = getRestrictedQualifier() +// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) +// } +// } publication } From 662c396354d63732d95dba3055ceaccb0fe6a526 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 19 Jul 2021 12:41:14 +0200 Subject: [PATCH 059/287] duplicate the number of partitions in ConvertCrossrefToOaf --- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index f845d97f3..34ca58344 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -99,7 +99,7 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=7680 --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} From 5e5f65a3c313bbd985a9504b657251da43fb525c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 19 Jul 2021 15:56:55 +0200 Subject: [PATCH 060/287] contents mapped from the stores with 'claim' interpretation will not change their identifier along their way towards the graph --- .../raw/AbstractMdRecordToOafMapper.java | 24 +++++++++++++++---- .../raw/GenerateEntitiesApplication.java | 6 +++-- .../dhp/oa/graph/raw/OafToOafMapper.java | 5 ++++ .../dhp/oa/graph/raw/OdfToOafMapper.java | 5 ++++ 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 1e80dfd46..cbaadf85c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -71,6 +71,8 @@ public abstract class AbstractMdRecordToOafMapper { private final boolean shouldHashId; + private final boolean forceOriginalId; + protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_4_SLASH = "http://datacite.org/schema/kernel-4/"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; @@ -98,11 +100,20 @@ public abstract class AbstractMdRecordToOafMapper { nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3); } + protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs, final boolean invisible, + final boolean shouldHashId, final boolean forceOriginalId) { + this.vocs = vocs; + this.invisible = invisible; + this.shouldHashId = shouldHashId; + this.forceOriginalId = forceOriginalId; + } + protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId) { this.vocs = vocs; this.invisible = invisible; this.shouldHashId = shouldHashId; + this.forceOriginalId = false; } public List processMdRecord(final String xml) { @@ -190,10 +201,15 @@ public abstract class AbstractMdRecordToOafMapper { final long lastUpdateTimestamp) { final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); - final String id = IdentifierFactory.createIdentifier(entity, shouldHashId); - if (!id.equals(entity.getId())) { - entity.getOriginalId().add(entity.getId()); - entity.setId(id); + + if (!forceOriginalId) { + final String id = IdentifierFactory.createIdentifier(entity, shouldHashId); + if (!id.equals(entity.getId())) { + final Set originalId = Sets.newHashSet(entity.getOriginalId()); + originalId.add(entity.getId()); + entity.setOriginalId(Lists.newArrayList(originalId)); + entity.setId(id); + } } final List oafs = Lists.newArrayList(entity); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index fcd6f459a..bbfb7429f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -163,11 +163,13 @@ public class GenerateEntitiesApplication { switch (type.toLowerCase()) { case "oaf-store-cleaned": - case "oaf-store-claim": return new OafToOafMapper(vocs, false, shouldHashId).processMdRecord(s); + case "oaf-store-claim": + return new OafToOafMapper(vocs, false, shouldHashId, true).processMdRecord(s); case "odf-store-cleaned": - case "odf-store-claim": return new OdfToOafMapper(vocs, false, shouldHashId).processMdRecord(s); + case "odf-store-claim": + return new OdfToOafMapper(vocs, false, shouldHashId, true).processMdRecord(s); case "oaf-store-intersection": return new OafToOafMapper(vocs, true, shouldHashId).processMdRecord(s); case "odf-store-intersection": diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 06aeab345..d753cddeb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -27,6 +27,11 @@ import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; public class OafToOafMapper extends AbstractMdRecordToOafMapper { + public OafToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId, + final boolean forceOrginalId) { + super(vocs, invisible, shouldHashId, forceOrginalId); + } + public OafToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId) { super(vocs, invisible, shouldHashId); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index b7400873b..7925a7826 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -22,6 +22,11 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/"; + public OdfToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId, + final boolean forceOrginalId) { + super(vocs, invisible, shouldHashId, forceOrginalId); + } + public OdfToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId) { super(vocs, invisible, shouldHashId); } From 5947cddafc11a26026a2a4cf1906ce395348b0c5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 19 Jul 2021 17:52:24 +0200 Subject: [PATCH 061/287] adding record identifier among the originalIds regardless of what IdentifierFactory produces --- .../dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index cbaadf85c..03c3eeb3c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -202,12 +202,13 @@ public abstract class AbstractMdRecordToOafMapper { final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); + final Set originalId = Sets.newHashSet(entity.getOriginalId()); + originalId.add(entity.getId()); + entity.setOriginalId(Lists.newArrayList(originalId)); + if (!forceOriginalId) { final String id = IdentifierFactory.createIdentifier(entity, shouldHashId); if (!id.equals(entity.getId())) { - final Set originalId = Sets.newHashSet(entity.getOriginalId()); - originalId.add(entity.getId()); - entity.setOriginalId(Lists.newArrayList(originalId)); entity.setId(id); } } From b420b11ed3c35182aaced4c9df378d2eb3ed0ec8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 19 Jul 2021 18:16:23 +0200 Subject: [PATCH 062/287] duplicate the number of partitions in ProcessMag --- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index 34ca58344..499e0e0bb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -124,7 +124,7 @@ --executor-memory=${sparkExecutorIntersectionMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=7680 --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} From 77e8c6c7f7688078bb459da07566a6d75f03dccd Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 20 Jul 2021 11:51:33 +0200 Subject: [PATCH 063/287] filtering 'old' OpenAIRE ids from the entity.originalId[] array in the OAF -> XML searialization procedure --- .../eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 86bbae99e..a985d2371 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -16,6 +16,7 @@ import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -183,6 +184,7 @@ public class XmlRecordFactory implements Serializable { .getOriginalId() .stream() .filter(Objects::nonNull) + .filter(id -> !id.matches("^\\d{2}" + IdentifierFactory.ID_PREFIX_SEPARATOR)) .map(s -> XmlSerializationUtils.asXmlElement("originalId", s)) .collect(Collectors.toList())); } From 906995847955881606e357abd4ebbd244f149eff Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 20 Jul 2021 19:31:43 +0200 Subject: [PATCH 064/287] tests for enermaps --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 25 +++++++ .../eu/dnetlib/dhp/oa/graph/raw/enermaps.xml | 72 +++++++++++++++++++ .../oa/provision/XmlRecordFactoryTest.java | 29 ++++++++ .../eu/dnetlib/dhp/oa/provision/enermaps.json | 1 + 4 files changed, 127 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 5b229a625..495dc4a04 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -567,6 +567,31 @@ public class MappersTest { assertNotNull(d.getInstance().get(0).getUrl()); } + @Test + void testEnermaps() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("enermaps.xml")); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + assertEquals(1, list.size()); + assertTrue(list.get(0) instanceof Dataset); + + final Dataset d = (Dataset) list.get(0); + + assertValidId(d.getId()); + assertValidId(d.getCollectedfrom().get(0).getKey()); + assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); + assertEquals(1, d.getAuthor().size()); + assertEquals(1, d.getInstance().size()); + assertNotNull(d.getInstance().get(0).getUrl()); + assertNotNull(d.getContext()); + assertTrue(StringUtils.isNotBlank(d.getContext().get(0).getId())); + assertEquals("enermaps::selection::tgs00004", d.getContext().get(0).getId()); + } + @Test void testClaimFromCrossref() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_crossref.xml")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml new file mode 100644 index 000000000..362b40c85 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml @@ -0,0 +1,72 @@ + + + + enermaps____::04149ee428d07360314c2cb3ba95d41e + tgs00004 + 2021-07-20T18:43:12.096+02:00 + enermaps____ + + + + https://ec.europa.eu/eurostat/web/products-datasets/-/tgs00004 + + + Statistical Office of the European Union (Eurostat) + + + + + Regional GDP + + + Statistical Office of the European Union (Eurostat) + 2020 + + 2020-10-07 + + + + OPEN + Creative Commons Attribution 4.0 International + + + GDP expressed in PPS (purchasing power standards) eliminates differences in price levels between countries. Calculations on a per inhabitant basis allow for the comparison of economies and regions significantly different in absolute size. GDP per inhabitant in PPS is the key variable for determining the eligibility of NUTS 2 regions in the framework of the European Unions structural policy. + + 0021 + 2020-10-07 + OPEN + Creative Commons Attribution 4.0 International + + + + + + + + + https%3A%2F%2Flab.idiap.ch%2Fenermaps%2Fapi%2Fdatacite + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 6631cb4da..a5a1563aa 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -7,6 +7,8 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; +import eu.dnetlib.dhp.oa.provision.utils.ContextDef; +import eu.dnetlib.dhp.schema.oaf.Dataset; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -131,4 +133,31 @@ public class XmlRecordFactoryTest { System.out.println(doc.asXML()); assertEquals("", doc.valueOf("//rel/validated")); } + + @Test + public void testEnermapsRecord() throws IOException, DocumentException { + + String contextmap = "" + + ""+ + ""+ + ""; + + ContextMapper contextMapper = ContextMapper.fromXml(contextmap); + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + otherDsTypeId); + + Dataset d = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); + + JoinedEntity je = new JoinedEntity<>(d); + + String xml = xmlRecordFactory.build(je); + + assertNotNull(xml); + + Document doc = new SAXReader().read(new StringReader(xml)); + assertNotNull(doc); + System.out.println(doc.asXML()); + assertEquals("enermaps::selection::tgs00004", doc.valueOf("//concept/@id")); + } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json new file mode 100644 index 000000000..dcd4c2ee1 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|enermaps____::d77d5e503ad1439f585ac494268b351b","value":"Enermaps","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626800904248,"id":"50|enermaps____::04149ee428d07360314c2cb3ba95d41e","originalId":["50|enermaps____::04149ee428d07360314c2cb3ba95d41e","tgs00004"],"pid":[],"dateofcollection":"2021-07-20T18:43:12.096+02:00","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-07-20T18:43:12.096+02:00","altered":true,"baseURL":"https%3A%2F%2Flab.idiap.ch%2Fenermaps%2Fapi%2Fdatacite","identifier":"","datestamp":"","metadataNamespace":""}},"measures":null,"author":[{"fullname":"Statistical Office of the European Union (Eurostat)","name":"","surname":"","rank":1,"pid":[],"affiliation":[]}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"title":[{"value":"\n Regional GDP\n ","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[{"value":"2020-10-07","qualifier":{"classid":"Issued","classname":"Issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"description":[{"value":"GDP expressed in PPS (purchasing power standards) eliminates differences in price levels between countries. Calculations on a per inhabitant basis allow for the comparison of economies and regions significantly different in absolute size. GDP per inhabitant in PPS is the key variable for determining the eligibility of NUTS 2 regions in the framework of the European Unions structural policy.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2020-10-07","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Statistical Office of the European Union (Eurostat)","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"id":"enermaps::selection::tgs00004","dataInfo":[{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}]}],"externalReference":[],"instance":[{"license":{"value":"Creative Commons Attribution 4.0 International","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://ec.europa.eu/eurostat/web/products-datasets/-/tgs00004"],"distributionlocation":null,"collectedfrom":{"key":"10|enermaps____::d77d5e503ad1439f585ac494268b351b","value":"Enermaps","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2020-10-07","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"storagedate":{"value":"2020-10-07","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} From 1a5b114906107df32c0e2df33dbd2b1e9c34f871 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 22 Jul 2021 12:00:23 +0200 Subject: [PATCH 065/287] DoiBoost AccessRigh #4362 - refactoring --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 149d8ee5c..686a2f1f1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -249,8 +249,8 @@ object DoiBoostMappingUtil { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) } - val ar = getOpenAccessQualifier() - publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) +// val ar = getOpenAccessQualifier() +// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) } else { hb = ModelConstants.UNKNOWN_REPOSITORY From 63553a76b3a9e491672c98177be1883a91035fd8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 22 Jul 2021 12:01:48 +0200 Subject: [PATCH 066/287] added code to download gold issn list from unibi --- dhp-common/pom.xml | 10 ++ .../project/utils/CSVParser.java | 7 +- .../actionmanager/project/utils/ReadCSV.java | 16 +- .../project/utils/ReadExcel.java | 2 +- .../dhp/actionmanager/project/parameters.json | 5 + dhp-workflows/dhp-doiboost/pom.xml | 7 +- .../main/java/eu/dnetlib/doiboost/GetCSV.java | 37 +++++ .../eu/dnetlib/doiboost/UnibiGoldModel.java | 151 ++++++++++++++++++ .../download_unibi_issn_gold_parameters.json | 39 +++++ .../preprocess/oozie_app/workflow.xml | 14 ++ .../eu/dnetlib/dhp/doiboost/GetCSVTest.java | 43 +++++ .../dhp/doiboost/issn_gold_oa_version_4.csv | 73 +++++++++ .../oa/provision/utils/XmlRecordFactory.java | 2 +- 13 files changed, 398 insertions(+), 8 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 74f31cf35..b32039e32 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -112,6 +112,16 @@ eu.dnetlib.dhp dhp-schemas + + + org.apache.commons + commons-csv + 1.8 + + + + + diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java index 8bdce903b..1d839bec5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java @@ -16,10 +16,15 @@ import org.apache.commons.lang.reflect.FieldUtils; public class CSVParser { public List parse(String csvFile, String classForName) + throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { + return parse(csvFile, classForName, ';'); + } + + public List parse(String csvFile, String classForName, char delimiter) throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { final CSVFormat format = CSVFormat.EXCEL .withHeader() - .withDelimiter(';') + .withDelimiter(delimiter) .withQuote('"') .withTrim(); List ret = new ArrayList<>(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java index c73f7ec3d..f9118350f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java @@ -6,6 +6,7 @@ import java.io.Closeable; import java.io.IOException; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; @@ -29,6 +30,7 @@ public class ReadCSV implements Closeable { private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final String csvFile; + private final char delimiter; public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -44,19 +46,23 @@ public class ReadCSV implements Closeable { final String hdfsPath = parser.get("hdfsPath"); final String hdfsNameNode = parser.get("hdfsNameNode"); final String classForName = parser.get("classForName"); - - try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL)) { + Optional delimiter = Optional.ofNullable(parser.get("delimiter")); + char del = ';'; + if (delimiter.isPresent()) + del = delimiter.get().charAt(0); + try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, del)) { log.info("Getting CSV file..."); readCSV.execute(classForName); } + } public void execute(final String classForName) throws Exception { CSVParser csvParser = new CSVParser(); csvParser - .parse(csvFile, classForName) + .parse(csvFile, classForName, delimiter) .stream() .forEach(p -> write(p)); @@ -70,7 +76,8 @@ public class ReadCSV implements Closeable { public ReadCSV( final String hdfsPath, final String hdfsNameNode, - final String fileURL) + final String fileURL, + char delimiter) throws Exception { this.conf = new Configuration(); this.conf.set("fs.defaultFS", hdfsNameNode); @@ -85,6 +92,7 @@ public class ReadCSV implements Closeable { this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.csvFile = httpConnector.getInputSource(fileURL); + this.delimiter = delimiter; } protected void write(final Object p) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java index 5ce0a681c..a13d9b791 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java @@ -31,7 +31,7 @@ public class ReadExcel implements Closeable { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - ReadCSV.class + ReadExcel.class .getResourceAsStream( "/eu/dnetlib/dhp/actionmanager/project/parameters.json"))); diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json index b6c9c94b9..9ccb70a9f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json @@ -28,6 +28,11 @@ "paramLongName" : "sheetName", "paramDescription" : "the name of the sheet in case the file is excel", "paramRequired" : false +}, { + "paramName": "d", + "paramLongName" : "delimiter", + "paramDescription" : "the delimiter between fields in case it is not ;", + "paramRequired" : false } diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index f496ea9a2..ea8832754 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -82,7 +82,12 @@ org.apache.commons commons-text - + + eu.dnetlib.dhp + dhp-aggregation + 1.2.4-SNAPSHOT + compile + diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java new file mode 100644 index 000000000..00b6b184b --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java @@ -0,0 +1,37 @@ + +package eu.dnetlib.doiboost; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetCSV { + private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GetCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("hdfsPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + final String classForName = parser.get("classForName"); + + try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, ',')) { + + log.info("Getting CSV file..."); + readCSV.execute(classForName); + + } + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java new file mode 100644 index 000000000..e5bd49ada --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java @@ -0,0 +1,151 @@ + +package eu.dnetlib.doiboost; + +import java.io.Serializable; + +public class UnibiGoldModel implements Serializable { + private String ISSN; + private String ISSN_L; + private String ISSN_IN_DOAJ; + private String ISSN_IN_ROAD; + private String ISSN_IN_PMC; + private String ISSN_IN_OAPC; + private String ISSN_IN_WOS; + private String ISSN_IN_SCOPUS; + private String JOURNAL_IN_DOAJ; + private String JOURNAL_IN_ROAD; + private String JOURNAL_IN_PMC; + private String JOURNAL_IN_OAPC; + private String JOURNAL_IN_WOS; + private String JOURNAL_IN_SCOPUS; + private String TITLE; + private String TITLE_SOURCE; + + public String getISSN() { + return ISSN; + } + + public void setISSN(String ISSN) { + this.ISSN = ISSN; + } + + public String getISSN_L() { + return ISSN_L; + } + + public void setISSN_L(String ISSN_L) { + this.ISSN_L = ISSN_L; + } + + public String getISSN_IN_DOAJ() { + return ISSN_IN_DOAJ; + } + + public void setISSN_IN_DOAJ(String ISSN_IN_DOAJ) { + this.ISSN_IN_DOAJ = ISSN_IN_DOAJ; + } + + public String getISSN_IN_ROAD() { + return ISSN_IN_ROAD; + } + + public void setISSN_IN_ROAD(String ISSN_IN_ROAD) { + this.ISSN_IN_ROAD = ISSN_IN_ROAD; + } + + public String getISSN_IN_PMC() { + return ISSN_IN_PMC; + } + + public void setISSN_IN_PMC(String ISSN_IN_PMC) { + this.ISSN_IN_PMC = ISSN_IN_PMC; + } + + public String getISSN_IN_OAPC() { + return ISSN_IN_OAPC; + } + + public void setISSN_IN_OAPC(String ISSN_IN_OAPC) { + this.ISSN_IN_OAPC = ISSN_IN_OAPC; + } + + public String getISSN_IN_WOS() { + return ISSN_IN_WOS; + } + + public void setISSN_IN_WOS(String ISSN_IN_WOS) { + this.ISSN_IN_WOS = ISSN_IN_WOS; + } + + public String getISSN_IN_SCOPUS() { + return ISSN_IN_SCOPUS; + } + + public void setISSN_IN_SCOPUS(String ISSN_IN_SCOPUS) { + this.ISSN_IN_SCOPUS = ISSN_IN_SCOPUS; + } + + public String getJOURNAL_IN_DOAJ() { + return JOURNAL_IN_DOAJ; + } + + public void setJOURNAL_IN_DOAJ(String JOURNAL_IN_DOAJ) { + this.JOURNAL_IN_DOAJ = JOURNAL_IN_DOAJ; + } + + public String getJOURNAL_IN_ROAD() { + return JOURNAL_IN_ROAD; + } + + public void setJOURNAL_IN_ROAD(String JOURNAL_IN_ROAD) { + this.JOURNAL_IN_ROAD = JOURNAL_IN_ROAD; + } + + public String getJOURNAL_IN_PMC() { + return JOURNAL_IN_PMC; + } + + public void setJOURNAL_IN_PMC(String JOURNAL_IN_PMC) { + this.JOURNAL_IN_PMC = JOURNAL_IN_PMC; + } + + public String getJOURNAL_IN_OAPC() { + return JOURNAL_IN_OAPC; + } + + public void setJOURNAL_IN_OAPC(String JOURNAL_IN_OAPC) { + this.JOURNAL_IN_OAPC = JOURNAL_IN_OAPC; + } + + public String getJOURNAL_IN_WOS() { + return JOURNAL_IN_WOS; + } + + public void setJOURNAL_IN_WOS(String JOURNAL_IN_WOS) { + this.JOURNAL_IN_WOS = JOURNAL_IN_WOS; + } + + public String getJOURNAL_IN_SCOPUS() { + return JOURNAL_IN_SCOPUS; + } + + public void setJOURNAL_IN_SCOPUS(String JOURNAL_IN_SCOPUS) { + this.JOURNAL_IN_SCOPUS = JOURNAL_IN_SCOPUS; + } + + public String getTITLE() { + return TITLE; + } + + public void setTITLE(String TITLE) { + this.TITLE = TITLE; + } + + public String getTITLE_SOURCE() { + return TITLE_SOURCE; + } + + public void setTITLE_SOURCE(String TITLE_SOURCE) { + this.TITLE_SOURCE = TITLE_SOURCE; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json new file mode 100644 index 000000000..9ccb70a9f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json @@ -0,0 +1,39 @@ +[ + + { + "paramName": "fu", + "paramLongName" : "fileURL", + "paramDescription" : "the url of the file to download", + "paramRequired" : true + }, + { + "paramName": "hp", + "paramLongName" : "hdfsPath", + "paramDescription" : "where to save the file", + "paramRequired" : true + }, + { + "paramName": "hnn", + "paramLongName" : "hdfsNameNode", + "paramDescription" : "the name node", + "paramRequired" : true + }, + { + "paramName": "cfn", + "paramLongName" : "classForName", + "paramDescription" : "the name of the class to deserialize the csv to", + "paramRequired" : true +}, { + "paramName": "sn", + "paramLongName" : "sheetName", + "paramDescription" : "the name of the sheet in case the file is excel", + "paramRequired" : false +}, { + "paramName": "d", + "paramLongName" : "delimiter", + "paramDescription" : "the delimiter between fields in case it is not ;", + "paramRequired" : false +} + + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 52f958d4d..4de1a2185 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,6 +63,7 @@ + ${wf:conf('resumeFrom') eq 'DownloadGoldIssn'} ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} @@ -76,6 +77,19 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + eu.dnetlib.doiboost.GetCSV + --hdfsNameNode${nameNode} + --fileURL${unibiGoldIssnFileURL} + --hdfsPath${hdfsPath} + --classForNameeu.dnetlib.doiboost.UnibiGoldModel + + + + + + ${jobTracker} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java new file mode 100644 index 000000000..6cfc90736 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java @@ -0,0 +1,43 @@ + +package eu.dnetlib.dhp.doiboost; + +import java.util.List; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; + +public class GetCSVTest { + + @Test + public void readUnibiGoldTest() throws Exception { + + String programmecsv = IOUtils + .toString( + getClass() + .getClassLoader() + .getResourceAsStream("eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv")); + + CSVParser csvParser = new CSVParser(); + + List pl = csvParser.parse(programmecsv, "eu.dnetlib.doiboost.UnibiGoldModel", ','); + + Assertions.assertEquals(72, pl.size()); + +// ObjectMapper OBJECT_MAPPER = new ObjectMapper(); +// +// pl.forEach(res -> { +// try { +// System.out.println(OBJECT_MAPPER.writeValueAsString(res)); +// } catch (JsonProcessingException e) { +// e.printStackTrace(); +// } +// }); + + } +} diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv new file mode 100644 index 000000000..6d4b6cdcb --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv @@ -0,0 +1,73 @@ +"ISSN","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" +"0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" +"0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" +"0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" +"0003-424X","0003-424X",0,1,0,0,1,0,0,1,0,0,1,0,"Annales de zootechnie.","ROAD" +"0003-4827","0003-4827",0,1,0,0,0,1,0,1,0,0,0,1,"Annals of Iowa.","ROAD" +"0004-0592","0004-0592",1,1,0,0,1,1,1,1,0,0,1,1,"Archivos de Zootecnia","DOAJ" +"0004-282X","0004-282X",1,1,0,0,1,1,1,1,0,0,1,1,"Arquivos de Neuro-Psiquiatria","DOAJ" +"0006-3096","0006-3096",0,1,0,0,0,0,0,1,0,0,0,0,"Biologia.","ROAD" +"0006-8705","0006-8705",1,1,0,0,1,1,1,1,0,0,1,1,"Bragantia","DOAJ" +"0007-5124","0007-5124",0,1,0,0,1,0,0,1,1,0,1,1,"Experimental animals.","ROAD" +"0007-9502","0007-9502",0,1,0,0,0,0,0,1,0,0,0,0,"Caesaraugusta.","ROAD" +"0008-7386","0008-7386",1,1,0,0,0,1,1,1,0,0,0,1,"Časopis pro Moderní Filologii","DOAJ" +"0008-7629","0008-7629",1,0,0,0,0,0,1,0,0,0,0,0,"Catalogue and Index","DOAJ" +"0015-573X","0015-573X",0,1,0,0,0,0,0,1,0,0,0,0,"Folia quaternaria.","ROAD" +"0016-6987","0016-6987",1,0,0,0,1,1,1,0,0,0,1,1,"Genus","DOAJ" +"0016-7789","0016-7789",1,1,0,0,0,1,1,1,0,0,0,1,"Geologija ","DOAJ" +"0021-5007","0021-5007",0,1,0,0,0,1,0,1,0,0,0,1,"Nihon Seitai Gakkaishi.","ROAD" +"0023-4001","0023-4001",0,1,0,0,1,1,0,1,0,0,1,1,"Korean Journal of Parasitology","ROAD" +"0023-5415","0023-5415",1,1,0,0,0,0,1,1,0,0,0,0,"Kunst og Kultur","DOAJ" +"0026-1165","0026-1165",1,0,0,0,1,1,1,0,0,0,1,1,"Journal of the Meteorological Society of Japan","DOAJ" +"0029-0181","0029-0181",0,1,0,0,0,0,0,1,0,0,0,0,"Nihon butsuri gakkaishi.","ROAD" +"0034-7000","0034-7000",1,1,0,0,0,1,1,1,0,0,0,1,"Revista Argentina de Cardiología","DOAJ" +"0034-7523","0034-7523",0,1,0,0,0,1,0,1,0,0,0,1,"Revista cubana de medicina.","ROAD" +"0034-8244","0034-8244",1,0,0,0,1,1,1,0,0,0,1,1,"Revista de Filosofia","DOAJ" +"0034-8678","0034-8678",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Pedagogie","DOAJ" +"0036-8709","0036-8709",1,1,1,0,1,1,1,1,1,0,1,1,"Scientia Pharmaceutica","DOAJ" +"0044-4855","0044-4855",0,1,0,0,0,0,0,1,0,0,0,0,"Život i škola.","ROAD" +"0048-7449","0048-7449",1,1,0,0,1,1,1,1,0,0,1,1,"Reumatismo","DOAJ" +"0048-766X","0048-766X",0,1,0,0,0,1,0,1,0,0,0,1,"Revista chilena de obstetricia y ginecología.","ROAD" +"0065-1400","0065-1400",0,1,0,0,1,1,0,1,0,0,1,1,"Acta Neurobiologiae Experimentalis.","ROAD" +"0066-6742","0066-6742",1,0,0,0,1,1,1,0,0,0,1,1,"Archivo Español de Arqueología","DOAJ" +"0073-2435","0073-2435",1,1,0,0,1,1,1,1,0,0,1,1,"Historia (Santiago)","DOAJ" +"0073-4918","0073-4918",0,1,0,0,0,0,0,1,0,0,0,0,"Illinois Natural History Survey bulletin.","ROAD" +"0075-7411","0075-7411",1,0,0,0,0,0,1,0,0,0,0,0,"Anales","DOAJ" +"0077-2704","0077-2704",0,1,0,0,0,0,0,1,0,0,0,0,"Namn och bygd.","ROAD" +"0078-5466","0078-5466",0,1,0,0,1,1,0,1,0,0,1,1,"Optica Applicata.","ROAD" +"0079-4929","0079-4929",1,1,0,0,0,0,1,1,0,0,0,0,"Právněhistorické studie","DOAJ" +"0100-3283","0100-3283",0,1,0,0,0,0,0,1,0,0,0,0,"Hansenologia Internationalis.","ROAD" +"0100-4042","0100-4042",1,1,0,0,1,1,1,1,0,0,1,1,"Química Nova","DOAJ" +"0100-8692","0100-8692",1,1,0,0,1,0,1,1,0,0,1,1,"Arquivos Brasileiros de Psicologia ","DOAJ" +"0102-4469","0102-4469",1,0,0,0,0,0,1,0,0,0,0,0,"Perspectiva Teológica","DOAJ" +"0102-6992","0102-6992",1,1,0,0,0,1,1,1,0,0,0,1,"Sociedade e Estado","DOAJ" +"0103-1570","0103-1570",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Sociedade & Natureza","DOAJ" +"0103-2070","0103-2070",1,1,0,0,1,1,1,1,0,0,1,1,"Tempo Social","DOAJ" +"0104-0588","0104-0588",1,1,0,0,1,0,1,1,0,0,1,0,"Revista de Estudos da Linguagem","DOAJ" +"0104-6497","0104-6497",1,1,0,0,1,0,1,1,0,0,1,0,"Nauplius","DOAJ" +"0104-8929","0104-8929",0,1,0,0,0,0,0,1,0,0,0,0,"Saeculum.","ROAD" +"0104-9496","0104-9496",1,0,0,0,0,0,1,0,0,0,0,0,"Revista do Direito","DOAJ" +"0120-0380","0120-0380",0,1,0,0,1,0,0,1,0,0,1,0,"Boletín de matemáticas.","ROAD" +"0120-100X","0120-100X",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Ion","DOAJ" +"0120-4807","0120-4807",1,1,0,0,0,0,1,1,0,0,0,0,"Universitas Humanística","DOAJ" +"0121-4004","0121-4004",1,0,0,0,1,1,1,0,0,0,1,1,"Vitae","DOAJ" +"0121-4500","0121-4500",1,1,0,0,0,0,1,1,0,0,0,0,"Avances en Enfermería","DOAJ" +"0121-8697","0121-8697",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Derecho","DOAJ" +"0122-5197","0122-5197",0,1,0,0,0,1,0,1,0,0,0,1,"Memoria y Sociedad.","ROAD" +"0161-0457","0161-0457",1,0,1,1,1,1,1,0,1,1,1,1,"Scanning","DOAJ" +"0215-4706","0215-4706",1,1,0,0,0,0,1,1,0,0,0,0,"Floribunda.","ROAD" +"0324-6000","0324-6000",0,1,0,0,1,1,0,1,0,0,1,1,"Periodica polytechnica. Electrical engineering","ROAD" +"0325-187X","0325-187X",1,1,0,0,0,1,1,1,0,0,0,1,"Meteorologica","DOAJ" +"0326-7237","0326-7237",1,1,0,0,0,1,1,1,0,0,0,1,"Geoacta.","ROAD" +"0327-1676","0327-1676",0,1,0,0,0,0,1,1,0,0,0,0,"Andes","DOAJ" +"0327-2818","0327-2818",1,0,0,0,0,0,1,0,0,0,0,0,"Dominguezia","DOAJ" +"0327-5108","0327-5108",1,0,0,0,0,0,1,0,0,0,0,0,"Páginas de Filosofía","DOAJ" +"0327-585X","0327-585X",1,1,0,0,0,0,1,1,0,0,0,0,"Actualidad Económica","DOAJ" +"0327-6147","0327-6147",0,1,0,0,0,0,0,1,0,0,0,0,"Papeles de trabajo.","ROAD" +"0327-7763","0327-7763",0,1,0,0,0,0,0,1,0,0,0,0,"Revista del IICE.","ROAD" +"0327-9286","0327-9286",1,1,0,0,0,0,1,1,0,0,0,0,"Acta Toxicológica Argentina","DOAJ" +"0328-1205","0328-1205",1,1,0,0,1,1,1,1,0,0,1,1,"Synthesis (La Plata)","DOAJ" +"0329-5893","0329-5893",0,1,0,0,0,0,0,1,0,0,0,0,"Investigaciones en psicología..","ROAD" +"0329-8213","0329-8213",1,0,0,0,0,0,1,0,0,0,0,0,"Historia Regional","DOAJ" +"0332-5024","0332-5024",1,1,0,0,0,0,1,1,0,0,0,0,"Studia Musicologica Norvegica","DOAJ" +"0350-185X","0350-185X",1,1,0,0,0,0,1,1,0,0,0,0,"Južnoslovenski Filolog","DOAJ" diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index a985d2371..2c8240290 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -16,7 +16,6 @@ import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -43,6 +42,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { From ffdb2a3ea39823e04062d05f4ce50d6e0f9cf0f0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 23 Jul 2021 11:55:55 +0200 Subject: [PATCH 067/287] [cleaning] fixed filtering function for missing titles --- .../eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index a75cc52e6..e5181b111 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -98,7 +98,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { Result r = (Result) value; - if (Objects.nonNull(r.getTitle()) && r.getTitle().isEmpty()) { + if (Objects.isNull(r.getTitle()) || r.getTitle().isEmpty()) { return false; } From 5b6844b969ba4bff22b2f38e9aaf414aa6ac5056 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 23 Jul 2021 18:14:37 +0200 Subject: [PATCH 068/287] mapping funding relations from Datacite should be done according to the actual result identifier --- .../actionmanager/datacite/DataciteToOAFTransformation.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala index 0cdf0accb..045927bed 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala @@ -532,11 +532,11 @@ object DataciteToOAFTransformation { JField("awardUri", JString(awardUri)) <- fundingReferences } yield awardUri + result.setId(IdentifierFactory.createIdentifier(result)) var relations: List[Relation] = awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null) - fix_figshare(result) - result.setId(IdentifierFactory.createIdentifier(result)) + if (result.getId == null) return List() From 3920c69bc8e050ada6284f8d40e687e5dd9b1761 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Sun, 25 Jul 2021 09:51:24 +0200 Subject: [PATCH 069/287] change implementation of resolve Relation to generate jsonRdd in output --- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 3 ++- .../dhp/sx/graph/SparkResolveRelation.scala | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 58009bfcf..3f27b9442 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,7 +38,8 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class GroupEntitiesSparkJob { +public class +GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala index 0d0dc4159..82bf3c50e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala @@ -1,8 +1,10 @@ package eu.dnetlib.dhp.sx.graph +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf.{Relation, Result} import org.apache.commons.io.IOUtils +import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.sql._ @@ -40,7 +42,9 @@ object SparkResolveRelation { extractPidResolvedTableFromJsonRDD(spark, entityPath, workingPath) - val rPid:Dataset[(String,String)] = spark.read.load(s"$workingPath/resolvedPid").as[(String,String)] + val mappper = new ObjectMapper() + + val rPid:Dataset[(String,String)] = spark.read.load(s"$workingPath/relationResolvedPid").as[(String,String)] val relationDs:Dataset[(String,Relation)] = spark.read.load(relationPath).as[Relation].map(r => (r.getSource.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) @@ -68,6 +72,11 @@ object SparkResolveRelation { .write .mode(SaveMode.Overwrite) .save(s"$workingPath/relation_resolved") + + spark.read.load(s"$workingPath/relation_resolved").as[Relation] + .map(r => mappper.writeValueAsString(r)) + .rdd.saveAsTextFile(s"$workingPath/relation", classOf[GzipCodec]) + } @@ -102,7 +111,7 @@ object SparkResolveRelation { .map(s => s._2) .write .mode(SaveMode.Overwrite) - .save(s"$workingPath/resolvedPid") + .save(s"$workingPath/relationResolvedPid") } @@ -124,7 +133,7 @@ object SparkResolveRelation { .map(s => s._2) .write .mode(SaveMode.Overwrite) - .save(s"$workingPath/resolvedPid") + .save(s"$workingPath/relationResolvedPid") } def convertPidToDNETIdentifier(pid:String, pidType: String):String = { From 8fac10c91e109fd3ea74d4534b68df468618bf1a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Sun, 25 Jul 2021 11:15:37 +0200 Subject: [PATCH 070/287] fixed defintion wf of creation final infospace of scholexplorer --- .../sx/graph/SparkConvertRDDtoDataset.scala | 58 +++++++++++++------ .../sx/graph/SparkCreateSummaryObject.scala | 5 +- .../dhp/sx/graph/SparkResolveRelation.scala | 4 +- .../graph/finalGraph/oozie_app/workflow.xml | 4 +- 4 files changed, 48 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 2cd176dee..cb41d6134 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.sx.graph -import com.cloudera.com.fasterxml.jackson.databind.ObjectMapper + +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} @@ -9,12 +10,7 @@ import org.slf4j.{Logger, LoggerFactory} object SparkConvertRDDtoDataset { def main(args: Array[String]): Unit = { - val entities = List( - ("dataset", classOf[OafDataset]), - ("otherresearchproduct", classOf[OtherResearchProduct]), - ("publication", classOf[Publication]), - ("software", classOf[Software]) - ) + val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() @@ -29,15 +25,43 @@ object SparkConvertRDDtoDataset { val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") - val targetPath = parser.get("targetPath") - log.info(s"targetPath -> $targetPath") - val mapper = new ObjectMapper() - implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + val t = parser.get("targetPath") + log.info(s"targetPath -> $t") + + val entityPath = s"$t/entities" + val relPath = s"$t/relation" + val mapper = new ObjectMapper() + implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) + implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) + implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) + + + log.info("Converting dataset") + val rddDataset =spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) + spark.createDataset(rddDataset).as[OafDataset].write.mode(SaveMode.Overwrite).save(s"$entityPath/dataset") + + + log.info("Converting publication") + val rddPublication =spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) + spark.createDataset(rddPublication).as[Publication].write.mode(SaveMode.Overwrite).save(s"$entityPath/publication") + + log.info("Converting software") + val rddSoftware =spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) + spark.createDataset(rddSoftware).as[Software].write.mode(SaveMode.Overwrite).save(s"$entityPath/software") + + log.info("Converting otherresearchproduct") + val rddOtherResearchProduct =spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) + spark.createDataset(rddOtherResearchProduct).as[OtherResearchProduct].write.mode(SaveMode.Overwrite).save(s"$entityPath/otherresearchproduct") + + + log.info("Converting Relation") + + + val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation").map(s => mapper.readValue(s, classOf[Relation])) + spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") + - entities.foreach{ - e => - val rdd =spark.sparkContext.textFile(s"$sourcePath/${e._1}").map(s => mapper.readValue(s, e._2)) - spark.createDataset(rdd).as[Result].write.mode(SaveMode.Overwrite).save(s"$targetPath/${e._1}") - } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala index a66da3e6d..ac189b6ba 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.sx.graph import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.Result +import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils import org.apache.commons.io.IOUtils @@ -29,11 +29,12 @@ object SparkCreateSummaryObject { log.info(s"targetPath -> $targetPath") implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] + implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] - val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result] + val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Oaf].filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala index 82bf3c50e..b2fddec20 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala @@ -57,10 +57,10 @@ object SparkResolveRelation { currentRelation }.write .mode(SaveMode.Overwrite) - .save(s"$workingPath/resolvedSource") + .save(s"$workingPath/relationResolvedSource") - val relationSourceResolved:Dataset[(String,Relation)] = spark.read.load(s"$workingPath/resolvedSource").as[Relation].map(r => (r.getTarget.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) + val relationSourceResolved:Dataset[(String,Relation)] = spark.read.load(s"$workingPath/relationResolvedSource").as[Relation].map(r => (r.getTarget.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) relationSourceResolved.joinWith(rPid, relationSourceResolved("_1").equalTo(rPid("_2")), "left").map{ m => val targetResolved = m._2 diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml index 4e601bff3..d8eb1fc80 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml @@ -35,7 +35,7 @@ --masteryarn --sourcePath${sourcePath} - --targetPath${targetPath}/entities + --targetPath${targetPath} @@ -87,7 +87,7 @@ --masteryarn --summaryPath${targetPath}/provision/summaries --targetPath${targetPath}/provision/scholix - --relationPath${sourcePath}/relation_resolved + --relationPath${targetPath}/relation From 848aabbb6cef47e941f739f475521011dfee6be9 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Sun, 25 Jul 2021 12:06:41 +0200 Subject: [PATCH 071/287] minor fix --- .../main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala | 1 + .../java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala index ba483bfb2..0a7fc18fb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala @@ -42,6 +42,7 @@ object SparkCreateScholix { val relationDS: Dataset[(String, Relation)] = spark.read.load(relationPath).as[Relation] + .filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) val summaryDS: Dataset[(String, ScholixSummary)] = spark.read.load(summaryPath).as[ScholixSummary] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala index ac189b6ba..0970375f5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala @@ -34,7 +34,7 @@ object SparkCreateSummaryObject { implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] - val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Oaf].filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) + val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r=>r.getDataInfo== null || r.getDataInfo.getDeletedbyinference== false) ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath) From eb07f7f40f76b9a9c0a3f72549908c4432779558 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 27 Jul 2021 12:27:26 +0200 Subject: [PATCH 072/287] Hosted By Map --- .../eu/dnetlib/dhp/transform/terms.txt | 2 +- .../eu/dnetlib/dhp/transform/input_itgv4.xml | 2 +- ...c_cleaning_OPENAIREplus_compliant_hal_orig | 6 +- ...e_datacite_ExchangeLandingpagePid_orig.xsl | 20 +-- ...enaire_datacite_ExchangeLandingpagePid.xsl | 14 +- .../eu/dnetlib/dhp/transform/terms.txt | 2 +- .../crossref_mapping.csv | 4 +- .../dhp/doiboost/issn_gold_oa_version_4.csv | 2 +- dhp-workflows/dhp-graph-mapper/pom.xml | 5 + .../dhp/oa/graph/hostebymap/Aggregators.scala | 54 ++++++ .../dhp/oa/graph/hostebymap/Constants.java | 15 ++ .../dhp/oa/graph/hostebymap/GetCSV.java | 111 ++++++++++++ .../SparkPrepareHostedByMapData.scala | 158 ++++++++++++++++++ .../oa/graph/hostebymap/model/DOAJModel.java | 53 ++++++ .../hostebymap/model/UnibiGoldModel.java | 44 +++++ .../oa/graph/hostedbymap/TestPreprocess.scala | 59 +++++++ .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 111 ++++++++++++ .../eu/dnetlib/dhp/oa/graph/clean/terms.txt | 2 +- .../dhp/oa/graph/hostedbymap/datasource.json | 10 ++ .../graph/hostedbymap/doaj_transformed.json | 25 +++ .../dhp/oa/graph/hostedbymap/unibiGold.csv | 37 ++++ .../graph/hostedbymap/unibi_transformed.json | 29 ++++ .../dhp/oa/graph/raw/oaf_claim_dedup.xml | 4 +- .../dhp/sx/graph/bio/pubmed/pubmed.xml | 46 ++--- .../eu/dnetlib/dhp/transform/terms.txt | 2 +- 25 files changed, 764 insertions(+), 53 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 93cc00eca..30138b5bc 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN +datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml index 06325810b..b421f1342 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml @@ -31,7 +31,7 @@ https://pub.uni-bielefeld.de/record/1997560.json - 0016-9056 + 0016-9056 ger Friedrich diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig index d4eb71dc4..505f56294 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig @@ -134,9 +134,9 @@ oaf:identifier = set(xpath:"//dri:recordIdentifier", @identifierType = "oai-orig oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; // journal data -// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the ISSN (as e.g. print/online/...) -$varISSN = xpath:"//dc:source[starts-with(., 'ISSN:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'ISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'ISSN:')), 1, 4))))"; -//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'ISSN:'))"; +// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the issn (as e.g. print/online/...) +$varISSN = xpath:"//dc:source[starts-with(., 'issn:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'issn:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'issn:')), 1, 4))))"; +//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'issn:'))"; $varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/concat(substring(normalize-space(substring-after(., 'EISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'EISSN:')), 1, 4))))"; oaf:journal = set(xpath:"//oaf:datasourceprefix[$varISSN or $varEISSN]/''", @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";); diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl index 3cfaec80b..f80c91a6a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl @@ -169,7 +169,7 @@ @@ -283,7 +283,7 @@ - + - + - + @@ -793,9 +793,9 @@ - + - + @@ -844,7 +844,7 @@ - + - + - + @@ -594,9 +594,9 @@ - + - + diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 93cc00eca..30138b5bc 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN +datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv b/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv index 6a5fc3f87..0c1fd9e64 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv @@ -36,10 +36,10 @@ article-number,String,No,,N/A, published-print,Partial Date,No,Date on which the work was published in print,"Result/relevantDate with Qualifier(""published-print"", ""dnet:dataCite_date"")", published-online,Partial Date,No,Date on which the work was published online,"Result/relevantDate with Qualifier(""published-online"", ""dnet:dataCite_date"")", subject,Array of String,No,"Subject category names, a controlled vocabulary from Sci-Val. Available for most journal articles","Result/subject with Qualifier(""keywords"", ""dnet:subject_classification_typologies""). ","Future improvements: map the controlled vocabulary instead of using the generic ""keywords"" qualifier" -ISSN,Array of String,No,,"Publication/Journal/issn +issn,Array of String,No,,"Publication/Journal/issn Publication/Journal/lissn Publication/Journal/eissn",The mapping depends on the value of issn-type -issn-type,Array of ISSN with Type,No,List of ISSNs with ISSN type information,N/A,Its value guides the setting of the properties in Journal (see row above) +issn-type,Array of issn with Type,No,List of ISSNs with issn type information,N/A,Its value guides the setting of the properties in Journal (see row above) ISBN,Array of String,No,,Publication/source,"In case of Book We can map ISBN and container title on Publication/source using this syntax container-title + ""ISBN: "" + ISBN" archive,Array of String,No,,N/A, license,Array of License,No,,Result/Instance/License, diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv index 6d4b6cdcb..ebde1bf18 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv @@ -1,4 +1,4 @@ -"ISSN","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" +"issn","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" "0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" "0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" "0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 19febb9ed..665c01276 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -122,6 +122,11 @@ org.json4s json4s-jackson_2.11 + + com.opencsv + opencsv + 5.5 + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala new file mode 100644 index 000000000..d98418339 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala @@ -0,0 +1,54 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap + +import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} +import org.apache.spark.sql.expressions.Aggregator + + +case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} + + +object Aggregators { + + + + def getId(s1:String, s2:String) : String = { + if (!s1.equals("")){ + return s1} + s2 + } + + + def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { + val transformedData : Dataset[HostedByItemType] = df + .groupByKey(_.id)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator) + .map{ + case (id:String , res:HostedByItemType) => res + }(Encoders.product[HostedByItemType]) + + transformedData + } + + val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { + override def zero: HostedByItemType = HostedByItemType("","","","","",false) + override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { + return merge(b, a) + } + override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + + HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) + + } + override def finish(reduction: HostedByItemType): HostedByItemType = reduction + override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + + override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + }.toColumn + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java new file mode 100644 index 000000000..b07e33cd1 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java @@ -0,0 +1,15 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap; + +public class Constants { + + + + public static final String OPENAIRE = "openaire"; + public static final String DOAJ = "doaj"; + public static final String UNIBI = "unibi"; + + + public static final String ISSN = "issn"; + public static final String EISSN = "eissn"; + public static final String ISSNL = "issnl"; +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java new file mode 100644 index 000000000..d397886a3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java @@ -0,0 +1,111 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; +import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.conf.Configuration; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; + +public class GetCSV { + private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GetCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("hdfsPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + final String classForName = parser.get("classForName"); + final Boolean shouldReplace = Optional.ofNullable((parser.get("replace"))) + .map(Boolean::valueOf) + .orElse(false); + + + URLConnection connection = new URL(fileURL).openConnection(); + connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); + + BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + + if(shouldReplace){ + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv"))); + String line = null; + while((line = in.readLine())!= null){ + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + } + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + + Class clazz = Class.forName(classForName); + + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(in) + .withType(clazz) + .withMultilineLimit(1) + .build() + .parse() + .forEach(line -> { + try { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + + + writer.close(); + in.close(); + if(shouldReplace){ + File f = new File("/tmp/DOAJ.csv"); + f.delete(); + } + + + } + + + + +} + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala new file mode 100644 index 000000000..55b6e36cd --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala @@ -0,0 +1,158 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostebymap.model.{DOAJModel, UnibiGoldModel} +import eu.dnetlib.dhp.oa.merge.AuthorMerger +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Datasource, Organization, Publication, Relation} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} +import org.json4s.jackson.Serialization.write + +import scala.collection.mutable.ListBuffer + +object SparkPrepareHostedByMapData { + + case class HostedByInfo(id: Option[String], officialname: String, journal_id: String, provenance : String, id_type: String) {} + + implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + implicit val mapEncoderDats: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] + implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] + implicit val mapEncoderHBI: Encoder[HostedByInfo] = Encoders.product[HostedByInfo] + + + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { + val openaire: HostedByInfo = input._1._1 + val doaj: HostedByInfo = input._1._2 + val gold: HostedByInfo = input._2 + val isOpenAccess: Boolean = doaj == null && gold == null + + openaire.journal_id match { + case Constants.ISSN => return HostedByItemType(openaire.id.get, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) + case Constants.EISSN => return HostedByItemType(openaire.id.get, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) + case Constants.ISSNL => return HostedByItemType(openaire.id.get, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + + // catch the default with a variable so you can print it + case whoa => return null + } + } + + def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { + implicit val formats = DefaultFormats + val serializedJSON:String = write(input) + + var hostedBy = new ListBuffer[String]() + if(!input.issn.equals("")){ + hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" + } + if(!input.eissn.equals("")){ + hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" + } + if(!input.lissn.equals("")){ + hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" + } + + hostedBy + + } + + + def readOADataset(input:String, spark: SparkSession): Dataset[HostedByInfo] = { + spark.read.textFile(input).as[Datasource].flatMap(ds => { + val lst = new ListBuffer[HostedByInfo]() + if (ds.getJournal == null) { + return null + } + val issn: String = ds.getJournal.getIssnPrinted + val issnl: String = ds.getJournal.getIssnOnline + val eissn: String = ds.getJournal.getIssnOnline + val id: String = ds.getId + val officialname: String = ds.getOfficialname.getValue + if (issn != null) { + lst += HostedByInfo(Some(id), officialname, issn, Constants.OPENAIRE, Constants.ISSN) + } + if (issnl != null) { + lst += HostedByInfo(Some(id), officialname, issnl, Constants.OPENAIRE, Constants.ISSNL) + } + if (eissn != null) { + lst += HostedByInfo(Some(id), officialname, eissn, Constants.OPENAIRE, Constants.EISSN) + } + lst + }).filter(i => i != null) + } + + def main(args: Array[String]): Unit = { + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedby/prepare_hostedby_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + import spark.implicits._ + + val datasourcePath = parser.get("datasourcePath") + val workingDirPath = parser.get("workingPath") + + + + + logger.info("Getting the Datasources") + + val doajDataset: Dataset[DOAJModel] = spark.read.load(workingDirPath + "/doaj").as[DOAJModel] + val unibiDataset: Dataset[UnibiGoldModel] = spark.read.load(datasourcePath).as[UnibiGoldModel] + + val oa: Dataset[HostedByInfo] = readOADataset(datasourcePath, spark) + + val doaj: Dataset[HostedByInfo] = doajDataset.flatMap(doaj => { + val lst = new ListBuffer[HostedByInfo]() + val issn: String = doaj.getIssn + val eissn: String = doaj.getEissn + val officialname: String = doaj.getJournalTitle + if (issn != null) { + lst += HostedByInfo(null, officialname, issn, Constants.DOAJ, Constants.ISSN) + } + if (eissn != null) { + lst += HostedByInfo(null, officialname, eissn, Constants.DOAJ, Constants.EISSN) + } + lst + }) + + val gold: Dataset[HostedByInfo] = unibiDataset.flatMap(gold => { + val lst = new ListBuffer[HostedByInfo]() + val issn: String = gold.getIssn + val issnl: String = gold.getIssn_l + val officialname: String = gold.getTitle + if (issn != null) { + lst += HostedByInfo(null, officialname, issn, Constants.UNIBI, Constants.ISSN) + } + if (issnl != null) { + lst += HostedByInfo(null, officialname, issnl, Constants.UNIBI, Constants.ISSNL) + } + lst + }) + + Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") + .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) + .filter(i => i != null)) + .flatMap(toHostedByMap) + // .map(i => (i.id,i)) + // .groupByKey(_._1) + // .agg(hostedByAggregator.toColumn) + // .map(p => p._2) + .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") + + + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java new file mode 100644 index 000000000..fe1d14a76 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java @@ -0,0 +1,53 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + + +public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") + private String journalTitle; + + @CsvBindByName(column = "Journal ISSN (print version)") + private String issn ; + + @CsvBindByName(column = "Journal EISSN (online version)") + private String eissn; + + @CsvBindByName(column = "Review process") + private String reviewProcess; + + + public String getJournalTitle() { + return journalTitle; + } + + public void setJournalTitle(String journalTitle) { + this.journalTitle = journalTitle; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getReviewProcess() { + return reviewProcess; + } + + public void setReviewProcess(String reviewProcess) { + this.reviewProcess = reviewProcess; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java new file mode 100644 index 000000000..309f74eea --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java @@ -0,0 +1,44 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap.model; + +import com.opencsv.bean.CsvBindByName; + +import java.io.Serializable; + +public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") + private String issn; + @CsvBindByName(column = "ISSN_L") + private String issn_l; + @CsvBindByName(column = "TITLE") + private String title; + @CsvBindByName(column = "TITLE_SOURCE") + private String title_source; + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getIssn_l() { + return issn_l; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getTitle_source() { + return title_source; + } + + public void setTitle_source(String title_source) { + this.title_source = title_source; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala new file mode 100644 index 000000000..657f20fce --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -0,0 +1,59 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import java.sql.Timestamp + +import eu.dnetlib.dhp.oa.graph.hostebymap.SparkPrepareHostedByMapData +import eu.dnetlib.dhp.oa.graph.hostebymap.SparkPrepareHostedByMapData.HostedByInfo +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, SparkSession} +import org.codehaus.jackson.map.ObjectMapper +import org.json4s.DefaultFormats +import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} +import org.junit.jupiter.api.Test +import org.slf4j.{Logger, LoggerFactory} + +import scala.io.Source + +class TestPreprocess { + + val logger: Logger = LoggerFactory.getLogger(getClass) + val mapper = new ObjectMapper() + + + + @Test + def readDatasource():Unit = { + + + import org.apache.spark.sql.Encoders + implicit val formats = DefaultFormats + import org.json4s.jackson.Serialization.write + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("datasource.json").getPath + + + val schema = Encoders.product[HostedByInfo] + + spark.read.textFile(path).foreach(r => println(mapper.writeValueAsString(r))) + +// SparkPrepareHostedByMapData.readOADataset(path, spark) +// .foreach(r => println(write(r))) + + + spark.close() + } + + + + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java new file mode 100644 index 000000000..01c70502c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -0,0 +1,111 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; +import eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV; +import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; +import org.junit.jupiter.api.Test; + +import java.io.*; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.List; + +public class TestReadCSV { + + @Test + public void testCSVUnibi() throws FileNotFoundException { + + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") + .getPath(); + + List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) + .withType(UnibiGoldModel.class) + .build() + .parse(); + + ObjectMapper mapper = new ObjectMapper(); + + beans.forEach(r -> { + try { + System.out.println(mapper.writeValueAsString(r)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); + + + } + + @Test + public void testCSVUrlUnibi() throws IOException { + + URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); + + BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel.class) + .build() + .parse() + .forEach(line -> + + { + try { + System.out.println(mapper.writeValueAsString(line)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } + + + ); + } + + @Test + public void testCSVUrlDOAJ() throws IOException { + + URLConnection connection = new URL("https://doaj.org/csv").openConnection(); + connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); + + BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + //BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); + String line = null; + while((line = in.readLine())!= null){ + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); + ObjectMapper mapper = new ObjectMapper(); + + + + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.DOAJModel.class) + .withMultilineLimit(1) + .build() + .parse() + .forEach(lline -> + + { + try { + System.out.println(mapper.writeValueAsString(lline)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } + + + ); + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index ba47aaf5c..4db5fd463 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN +datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json new file mode 100644 index 000000000..15e1dcc45 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -0,0 +1,10 @@ +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.energyret.ru/jour/"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"extraInfo":[],"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1947-9336","issnPrinted":"1947-9328","name":"International Journal of Operations Research and Information Systems"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl19479328"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1947-9328"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"extraInfo":[],"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0007-1528","name":"Bulletin of the British Mycological Society"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00071528"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0007-1528"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"extraInfo":[],"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2410-3993","issnPrinted":"","name":"Journal of Technology and Innovation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24103993"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn__online::2410-3993"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"citationguidelineurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.gbif.org/citation-guidelines"},"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"databaseaccesstype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"open"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"datarepository::unknown","classname":"Data Repository","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datauploadrestriction":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"other"},"datauploadtype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"restricted"},"dateofcollection":"2019-02-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bavarian Natural History Collections - occurrence data"},"extraInfo":[],"id":"10|re3data_____::b105fa2123b1e2bc3dfff303454c6f72","lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"missionstatementurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"r3b105fa2123"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DWB BioCASe Data Publication pipeline and RDF service"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["re3data_____::r3d100012934"],"pid":[],"pidsystems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DOI URN"},"policies":[],"qualitymanagementkind":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"yes"},"releasestartdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2006-01-01"},"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Life Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Plant Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Zoology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Evolution, Anthropology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Biology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Natural Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geology and Palaeontology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geochemistry, Mineralogy and Crystallography"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geosciences (including Geography)"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/dwb_biocase.html"}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json new file mode 100644 index 000000000..9cec80eb4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json @@ -0,0 +1,25 @@ +{"journalTitle":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","reviewProcess":"Double blind peer review"} +{"journalTitle":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","reviewProcess":"Blind peer review"} +{"journalTitle":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","reviewProcess":"Double blind peer review"} +{"journalTitle":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","reviewProcess":"Double blind peer review"} +{"journalTitle":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","reviewProcess":"Double blind peer review"} +{"journalTitle":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","reviewProcess":"Double blind peer review"} +{"journalTitle":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","reviewProcess":"Peer review"} +{"journalTitle":"Membranes","issn":"2077-0375","eissn":"","reviewProcess":"Blind peer review"} +{"journalTitle":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","reviewProcess":"Blind peer review"} +{"journalTitle":"Agriculture","issn":"","eissn":"2077-0472","reviewProcess":"Blind peer review"} +{"journalTitle":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","reviewProcess":"Double blind peer review"} +{"journalTitle":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","reviewProcess":"Double blind peer review"} +{"journalTitle":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","reviewProcess":"Blind peer review"} +{"journalTitle":"Religions","issn":"","eissn":"2077-1444","reviewProcess":"Double blind peer review"} +{"journalTitle":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","reviewProcess":"Double blind peer review"} +{"journalTitle":"UCV-Scientia","issn":"2077-172X","eissn":"","reviewProcess":"Peer review"} +{"journalTitle":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","reviewProcess":"Double blind peer review"} +{"journalTitle":"Granì","issn":"2077-1800","eissn":"2413-8738","reviewProcess":"Double blind peer review"} +{"journalTitle":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","reviewProcess":"Double blind peer review"} +{"journalTitle":"Science Education International","issn":"","eissn":"2077-2327","reviewProcess":"Double blind peer review"} +{"journalTitle":"Edumecentro","issn":"","eissn":"2077-2874","reviewProcess":"Double blind peer review"} +{"journalTitle":"Monteverdia","issn":"","eissn":"2077-2890","reviewProcess":"Double blind peer review"} +{"journalTitle":"Transformación","issn":"","eissn":"2077-2955","reviewProcess":"Double blind peer review"} +{"journalTitle":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","reviewProcess":"Double blind peer review"} +{"journalTitle":"Revue de Primatologie","issn":"","eissn":"2077-3757","reviewProcess":"Peer review"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv new file mode 100644 index 000000000..eb5d93451 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv @@ -0,0 +1,37 @@ +"ISSN","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" +"0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" +"0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" +"0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" +"0003-424X","0003-424X",0,1,0,0,1,0,0,1,0,0,1,0,"Annales de zootechnie.","ROAD" +"0003-4827","0003-4827",0,1,0,0,0,1,0,1,0,0,0,1,"Annals of Iowa.","ROAD" +"0004-0592","0004-0592",1,1,0,0,1,1,1,1,0,0,1,1,"Archivos de Zootecnia","DOAJ" +"0004-282X","0004-282X",1,1,0,0,1,1,1,1,0,0,1,1,"Arquivos de Neuro-Psiquiatria","DOAJ" +"0006-3096","0006-3096",0,1,0,0,0,0,0,1,0,0,0,0,"Biologia.","ROAD" +"0006-8705","0006-8705",1,1,0,0,1,1,1,1,0,0,1,1,"Bragantia","DOAJ" +"0007-5124","0007-5124",0,1,0,0,1,0,0,1,1,0,1,1,"Experimental animals.","ROAD" +"0007-9502","0007-9502",0,1,0,0,0,0,0,1,0,0,0,0,"Caesaraugusta.","ROAD" +"0008-7386","0008-7386",1,1,0,0,0,1,1,1,0,0,0,1,"Časopis pro Moderní Filologii","DOAJ" +"0008-7629","0008-7629",1,0,0,0,0,0,1,0,0,0,0,0,"Catalogue and Index","DOAJ" +"0015-573X","0015-573X",0,1,0,0,0,0,0,1,0,0,0,0,"Folia quaternaria.","ROAD" +"0016-6987","0016-6987",1,0,0,0,1,1,1,0,0,0,1,1,"Genus","DOAJ" +"0016-7789","0016-7789",1,1,0,0,0,1,1,1,0,0,0,1,"Geologija ","DOAJ" +"0021-5007","0021-5007",0,1,0,0,0,1,0,1,0,0,0,1,"Nihon Seitai Gakkaishi.","ROAD" +"0023-4001","0023-4001",0,1,0,0,1,1,0,1,0,0,1,1,"Korean Journal of Parasitology","ROAD" +"0023-5415","0023-5415",1,1,0,0,0,0,1,1,0,0,0,0,"Kunst og Kultur","DOAJ" +"0026-1165","0026-1165",1,0,0,0,1,1,1,0,0,0,1,1,"Journal of the Meteorological Society of Japan","DOAJ" +"0029-0181","0029-0181",0,1,0,0,0,0,0,1,0,0,0,0,"Nihon butsuri gakkaishi.","ROAD" +"0034-7000","0034-7000",1,1,0,0,0,1,1,1,0,0,0,1,"Revista Argentina de Cardiología","DOAJ" +"0034-7523","0034-7523",0,1,0,0,0,1,0,1,0,0,0,1,"Revista cubana de medicina.","ROAD" +"0034-8244","0034-8244",1,0,0,0,1,1,1,0,0,0,1,1,"Revista de Filosofia","DOAJ" +"0034-8678","0034-8678",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Pedagogie","DOAJ" +"0036-8709","0036-8709",1,1,1,0,1,1,1,1,1,0,1,1,"Scientia Pharmaceutica","DOAJ" +"0044-4855","0044-4855",0,1,0,0,0,0,0,1,0,0,0,0,"Život i škola.","ROAD" +"0048-7449","0048-7449",1,1,0,0,1,1,1,1,0,0,1,1,"Reumatismo","DOAJ" +"0048-766X","0048-766X",0,1,0,0,0,1,0,1,0,0,0,1,"Revista chilena de obstetricia y ginecología.","ROAD" +"0065-1400","0065-1400",0,1,0,0,1,1,0,1,0,0,1,1,"Acta Neurobiologiae Experimentalis.","ROAD" +"0066-6742","0066-6742",1,0,0,0,1,1,1,0,0,0,1,1,"Archivo Español de Arqueología","DOAJ" +"0073-2435","0073-2435",1,1,0,0,1,1,1,1,0,0,1,1,"Historia (Santiago)","DOAJ" +"0073-4918","0073-4918",0,1,0,0,0,0,0,1,0,0,0,0,"Illinois Natural History Survey bulletin.","ROAD" +"0075-7411","0075-7411",1,0,0,0,0,0,1,0,0,0,0,0,"Anales","DOAJ" +"0077-2704","0077-2704",0,1,0,0,0,0,0,1,0,0,0,0,"Namn och bygd.","ROAD" +"0078-5466","0078-5466",0,1,0,0,1,1,0,1,0,0,1,1,"Optica Applicata.","ROAD" \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json new file mode 100644 index 000000000..d4acba4a9 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json @@ -0,0 +1,29 @@ +{"issn":"2502-731X","issn_l":"2502-731X","title":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","title_source":"ROAD"} +{"issn":"2502-7409","issn_l":"1411-0253","title":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","title_source":"ROAD"} +{"issn":"2502-7433","issn_l":"2502-7433","title":"At-Tadbir : jurnal ilmiah manajemen","title_source":"ROAD"} +{"issn":"2502-745X","issn_l":"2502-745X","title":"Jurnal Kesehatan Panrita Husada.","title_source":"ROAD"} +{"issn":"2502-7549","issn_l":"2502-7549","title":"ELang journal (An English Education journal)","title_source":"ROAD"} +{"issn":"2423-3633","issn_l":"2423-3625","title":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","title_source":"ROAD"} +{"issn":"2423-5563","issn_l":"2423-3773","title":"Pizhūhishnāmah-i ̒ilm/sanjī.","title_source":"ROAD"} +{"issn":"1735-434X","issn_l":"1735-434X","title":"Iranian journal of animal biosystematics.","title_source":"ROAD"} +{"issn":"2423-4435","issn_l":"2008-6113","title":"Majallah-i jangal-i Īrān.","title_source":"ROAD"} +{"issn":"2423-4575","issn_l":"2423-4575","title":"Ābziyān-i zinatī.","title_source":"ROAD"} +{"issn":"2423-4974","issn_l":"2423-4974","title":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","title_source":"ROAD"} +{"issn":"2380-0607","issn_l":"2380-0607","title":"AIHM journal club.","title_source":"ROAD"} +{"issn":"1085-4568","issn_l":"1085-4568","title":"Frontiers.","title_source":"ROAD"} +{"issn":"2380-8845","issn_l":"2380-8845","title":"˜The œjournal of contemporary archival studies.","title_source":"ROAD"} +{"issn":"2381-1803","issn_l":"2381-1803","title":"International journal of complementary & alternative medicine.","title_source":"ROAD"} +{"issn":"2381-2478","issn_l":"2381-2478","title":"Palapala.","title_source":"ROAD"} +{"issn":"2382-5170","issn_l":"2382-5170","title":"Asia pacific journal of environment ecology and sustainable development.","title_source":"ROAD"} +{"issn":"2382-9737","issn_l":"2382-9737","title":"Majallah-i salāmat va bihdāsht","title_source":"ROAD"} +{"issn":"2382-977X","issn_l":"2382-977X","title":"UCT journal of research in science ,engineering and technology","title_source":"ROAD"} +{"issn":"2382-9974","issn_l":"2382-9974","title":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","title_source":"ROAD"} +{"issn":"2227-4782","issn_l":"2227-4782","title":"Problemi endokrinnoï patologìï.","title_source":"ROAD"} +{"issn":"2685-0079","issn_l":"2597-4971","title":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","title_source":"ROAD"} +{"issn":"2574-0075","issn_l":"2574-0075","title":"Hypermedia magazine.","title_source":"ROAD"} +{"issn":"2574-0296","issn_l":"2574-0296","title":"˜The œmuseum review.","title_source":"ROAD"} +{"issn":"2574-0334","issn_l":"2574-0334","title":"Bioactive compounds in health and disease.","title_source":"ROAD"} +{"issn":"2574-108X","issn_l":"2574-108X","title":"Journal of computer science integration.","title_source":"ROAD"} +{"issn":"2574-254X","issn_l":"2574-254X","title":"Child and adolescent obesity.","title_source":"ROAD"} +{"issn":"2574-3325","issn_l":"2574-3325","title":"Journal of research on the college president.","title_source":"ROAD"} +{"issn":"2239-6101","issn_l":"2239-5938","title":"European journal of sustainable development.","title_source":"ROAD"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml index 95457fb70..b45fa1edb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml @@ -16,7 +16,7 @@ Doğuş Üniversitesi, Fen Edebiyat Fakültesi, Fizik Bölümü TR3959 urn:issn:1748-0221 - VOLUME=7;ISSUE=1;ISSN=1748-0221;TITLE=Journal of Instrumentation + VOLUME=7;ISSUE=1;issn=1748-0221;TITLE=Journal of Instrumentation ATLAS Collaboration Mitsou, Vasiliki Fiorini, Luca Ros Martínez, Eduardo Castillo Giménez, María Victoria Fuster Verdú, Juan A. García García, Carmen Cabrera Urbán, Susana Martí García, Salvador Salt Cairols, José Lacasta Llácer, Carlos Valls Ferrer, @@ -30,7 +30,7 @@ interactions. Journal of Instrumentation, 7(1). doi: 10.1088/1748-0221/7/01/P01013. UC Santa Cruz: Retrieved from: http://www.escholarship.org/uc/item/05j2j2br Journal of Instrumentation, 7 - VOLUME=7;ISSN=1748-0221;TITLE=Journal of Instrumentation + VOLUME=7;issn=1748-0221;TITLE=Journal of Instrumentation 1748-0221 Journal of Instrumentation 7, P01013 (2012). doi:10.1088/1748-0221/7/01/P01013 diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml index 22da07e29..06ebe97cd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml @@ -16,7 +16,7 @@
- 0006-2944 + 0006-2944 13 2 @@ -182,7 +182,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -314,7 +314,7 @@
- 0006-291X + 0006-291X 66 4 @@ -460,7 +460,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -644,7 +644,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -774,7 +774,7 @@
- 0006-291X + 0006-291X 66 4 @@ -934,7 +934,7 @@
- 1873-2968 + 1873-2968 24 16 @@ -1614,7 +1614,7 @@
- 1873-2968 + 1873-2968 24 16 @@ -2017,7 +2017,7 @@
- 0006-2952 + 0006-2952 24 17 @@ -2185,7 +2185,7 @@
- 1873-2968 + 1873-2968 24 17 @@ -2337,7 +2337,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -2585,7 +2585,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -2838,7 +2838,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3020,7 +3020,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3204,7 +3204,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3450,7 +3450,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3683,7 +3683,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3880,7 +3880,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4069,7 +4069,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4428,7 +4428,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4590,7 +4590,7 @@
- 0004-4172 + 0004-4172 25 9 @@ -4741,7 +4741,7 @@
- 0004-4172 + 0004-4172 25 9 @@ -4999,7 +4999,7 @@
- 0004-4172 + 0004-4172 25 9 diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 93cc00eca..30138b5bc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN +datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From 998b66855abcfe828f670586793b5dbefafa442d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 27 Jul 2021 15:11:37 +0200 Subject: [PATCH 073/287] updated assertions in eu.dnetlib.dhp.oa.graph.raw.MappersTest --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 58 ++++++++----------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 495dc4a04..aeeceaeb5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,17 +1,13 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.lenient; - -import java.io.IOException; -import java.util.List; -import java.util.Optional; - +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -20,22 +16,12 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.util.List; +import java.util.Optional; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -74,7 +60,7 @@ public class MappersTest { assertValidId(p.getId()); - assertEquals(1, p.getOriginalId().size()); + assertEquals(2, p.getOriginalId().size()); assertTrue(p.getOriginalId().contains("10.3897/oneeco.2.e13718")); assertValidId(p.getCollectedfrom().get(0).getKey()); @@ -261,8 +247,8 @@ public class MappersTest { final Relation r2 = (Relation) list.get(2); assertValidId(d.getId()); - assertEquals(1, d.getOriginalId().size()); - assertTrue(d.getOriginalId().contains("oai:zenodo.org:3234526")); + assertEquals(2, d.getOriginalId().size()); + assertTrue(d.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:zenodo.org:3234526"))); assertValidId(d.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(d.getAuthor().size() > 0); @@ -351,8 +337,11 @@ public class MappersTest { final Publication p = (Publication) list.get(0); assertValidId(p.getId()); - assertTrue(p.getOriginalId().size() == 1); - assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + assertEquals(2, p.getOriginalId().size()); + + assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); + //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); @@ -413,7 +402,8 @@ public class MappersTest { assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemename()); assertValidId(d.getId()); - assertTrue(d.getOriginalId().size() == 1); + assertEquals(2, d.getOriginalId().size()); + assertEquals("feabb67c-1fd1-423b-aec6-606d04ce53c6", d.getOriginalId().get(0)); assertValidId(d.getCollectedfrom().get(0).getKey()); @@ -688,8 +678,8 @@ public class MappersTest { final Dataset p = (Dataset) list.get(0); assertValidId(p.getId()); - assertTrue(p.getOriginalId().size() == 1); - assertEquals("df76e73f-0483-49a4-a9bb-63f2f985574a", p.getOriginalId().get(0)); + assertEquals(2, p.getOriginalId().size()); + assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("df76e73f-0483-49a4-a9bb-63f2f985574a"))); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); From 5aa7d16d1b1bf2f100081f491152db4fb7ac90a1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 27 Jul 2021 15:11:37 +0200 Subject: [PATCH 074/287] updated assertions in eu.dnetlib.dhp.oa.graph.raw.MappersTest --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 58 ++++++++----------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 5b229a625..c41a6c68c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,17 +1,13 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.lenient; - -import java.io.IOException; -import java.util.List; -import java.util.Optional; - +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -20,22 +16,12 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.util.List; +import java.util.Optional; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -74,7 +60,7 @@ public class MappersTest { assertValidId(p.getId()); - assertEquals(1, p.getOriginalId().size()); + assertEquals(2, p.getOriginalId().size()); assertTrue(p.getOriginalId().contains("10.3897/oneeco.2.e13718")); assertValidId(p.getCollectedfrom().get(0).getKey()); @@ -261,8 +247,8 @@ public class MappersTest { final Relation r2 = (Relation) list.get(2); assertValidId(d.getId()); - assertEquals(1, d.getOriginalId().size()); - assertTrue(d.getOriginalId().contains("oai:zenodo.org:3234526")); + assertEquals(2, d.getOriginalId().size()); + assertTrue(d.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:zenodo.org:3234526"))); assertValidId(d.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(d.getAuthor().size() > 0); @@ -351,8 +337,11 @@ public class MappersTest { final Publication p = (Publication) list.get(0); assertValidId(p.getId()); - assertTrue(p.getOriginalId().size() == 1); - assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + assertEquals(2, p.getOriginalId().size()); + + assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); + //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); @@ -413,7 +402,8 @@ public class MappersTest { assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemename()); assertValidId(d.getId()); - assertTrue(d.getOriginalId().size() == 1); + assertEquals(2, d.getOriginalId().size()); + assertEquals("feabb67c-1fd1-423b-aec6-606d04ce53c6", d.getOriginalId().get(0)); assertValidId(d.getCollectedfrom().get(0).getKey()); @@ -663,8 +653,8 @@ public class MappersTest { final Dataset p = (Dataset) list.get(0); assertValidId(p.getId()); - assertTrue(p.getOriginalId().size() == 1); - assertEquals("df76e73f-0483-49a4-a9bb-63f2f985574a", p.getOriginalId().get(0)); + assertEquals(2, p.getOriginalId().size()); + assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("df76e73f-0483-49a4-a9bb-63f2f985574a"))); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); From 825d9f02897c07f64880650ee6b58ccb4f3fcf2d Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 27 Jul 2021 16:09:30 +0200 Subject: [PATCH 075/287] fixed datacite workflow starting from Importing delta --- .../datacite/DataciteToOAFTransformation.scala | 2 +- .../actionmanager/datacite/oozie_app/workflow.xml | 2 +- .../datacite/DataciteToOAFTest.scala | 15 ++++++++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala index 045927bed..cfdd98d30 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala @@ -367,7 +367,7 @@ object DataciteToOAFTransformation { result.setDateofcollection(ISO8601FORMAT.format(d)) - result.setDateoftransformation(ISO8601FORMAT.format(ts)) + result.setDateoftransformation(ISO8601FORMAT.format(d)) result.setDataInfo(dataInfo) val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List()) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml index 036178b37..021704f54 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml @@ -16,7 +16,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala index 0d10c41dc..a795a910d 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala @@ -3,13 +3,14 @@ package eu.dnetlib.dhp.actionmanager.datacite import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.SerializationFeature - import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.Oaf import org.junit.jupiter.api.extension.ExtendWith import org.junit.jupiter.api.{BeforeEach, Test} import org.mockito.junit.jupiter.MockitoExtension +import java.text.SimpleDateFormat +import java.util.Locale import scala.io.Source @ExtendWith(Array(classOf[MockitoExtension])) @@ -22,6 +23,18 @@ class DataciteToOAFTest extends AbstractVocabularyTest{ super.setUpVocabulary() } + + @Test + def testDateMapping:Unit = { + val inputDate = "2021-07-14T11:52:54+0000" + val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) + val dt = ISO8601FORMAT.parse(inputDate) + println(dt.getTime) + + + } + + @Test def testMapping() :Unit = { val record =Source.fromInputStream(getClass.getResourceAsStream("record.json")).mkString From d267dce52009efb3e7db40548fcc7f8337b18d70 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 27 Jul 2021 17:18:29 +0200 Subject: [PATCH 076/287] [raw_all] added extra workflow step for patching the identifiers in the relations, given an id mapping dataset --- .../graph/raw/PatchRelationsApplication.java | 117 ++++++++++++++++++ .../graph/raw/common/RelationIdMapping.java | 24 ++++ .../oa/graph/patch_relations_parameters.json | 26 ++++ .../oa/graph/raw_all/oozie_app/workflow.xml | 47 ++++++- 4 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java new file mode 100644 index 000000000..151f5ba3f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -0,0 +1,117 @@ +package eu.dnetlib.dhp.oa.graph.raw; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.raw.common.RelationIdMapping; +import eu.dnetlib.dhp.schema.oaf.Relation; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + +import java.io.FileNotFoundException; +import java.util.Objects; +import java.util.Optional; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class PatchRelationsApplication { + + private static final Logger log = LoggerFactory.getLogger(PatchRelationsApplication.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Optional.ofNullable( + PatchRelationsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json")) + .orElseThrow(FileNotFoundException::new) + )); + parser.parseArgument(args); + + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String graphBasePath = parser.get("graphBasePath"); + log.info("graphBasePath: {}", graphBasePath); + + final String workingDir = parser.get("workingDir"); + log.info("workingDir: {}", workingDir); + + final String idMappingPath = parser.get("idMappingPath"); + log.info("idMappingPath: {}", idMappingPath); + + final SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> patchRelations(spark, graphBasePath, workingDir, idMappingPath)); + } + + /** + * Substitutes the identifiers (source/target) from the set of relations part of the graphBasePath included in the + * mapping provided by the dataset stored on idMappingPath, using workingDir as intermediate storage location. + * + * @param spark the SparkSession + * @param graphBasePath base graph path providing the set of relations to patch + * @param workingDir intermediate storage location + * @param idMappingPath dataset providing the old -> new identifier mapping + */ + private static void patchRelations(final SparkSession spark, final String graphBasePath, final String workingDir, final String idMappingPath) { + + final String relationPath = graphBasePath + "/relation"; + + final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); + final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); + + rels + .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "full") + .filter((FilterFunction>) t -> Objects.nonNull(t._1())) + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional.ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setSource); + return r; + }, Encoders.bean(Relation.class)) + .joinWith(idMapping, rels.col("target").equalTo(idMapping.col("oldId")), "full") + .filter((FilterFunction>) t -> Objects.nonNull(t._1())) + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional.ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setTarget); + return r; + }, Encoders.bean(Relation.class)) + .map( + (MapFunction) OBJECT_MAPPER::writeValueAsString, + Encoders.STRING()) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(workingDir); + + spark.read().textFile(workingDir) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(relationPath); + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java new file mode 100644 index 000000000..f251da8c3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java @@ -0,0 +1,24 @@ +package eu.dnetlib.dhp.oa.graph.raw.common; + +public class RelationIdMapping { + + private String oldId; + + private String newId; + + public String getOldId() { + return oldId; + } + + public void setOldId(final String oldId) { + this.oldId = oldId; + } + + public String getNewId() { + return newId; + } + + public void setNewId(final String newId) { + this.newId = newId; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json new file mode 100644 index 000000000..178c2d69b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "g", + "paramLongName": "graphBasePath", + "paramDescription": "base graph path providing the set of relations to patch", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workingDir", + "paramDescription": "intermediate storage location", + "paramRequired": true + }, + { + "paramName": "i", + "paramLongName": "idMappingPath", + "paramDescription": "dataset providing the old -> new identifier mapping", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index 7f1ecb39f..e7320de3b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -100,6 +100,16 @@ a blacklist of nsprefixes (comma separeted) + + shouldPatchRelations + false + activates the relation patching phase, driven by the content in ${idMappingPath} + + + idMappingPath + + path pointing to the relations identifiers mapping dataset + sparkDriverMemory memory for driver process @@ -538,7 +548,42 @@ - + + + + + + ${(shouldPatchRelations eq "true") and + (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} + + + + + + + + yarn + cluster + PatchRelations + eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --graphBasePath${workingDir}/graph_raw + --workingDir${workingDir}/patch_relations + --idMappingPath${idMappingPath} + + + + From 52e2315ba270384704f6839c5c95c7cd6b68df95 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 10:23:00 +0200 Subject: [PATCH 077/287] removed trick for datasourcetypeui --- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 3 +- .../dhp/oa/provision/XmlConverterJob.java | 73 +-- .../oa/provision/utils/XmlRecordFactory.java | 447 ++++++++---------- .../provision/input_params_xml_converter.json | 6 - .../dhp/oa/provision/oozie_app/workflow.xml | 5 - .../provision/IndexRecordTransformerTest.java | 36 +- .../oa/provision/XmlRecordFactoryTest.java | 70 ++- pom.xml | 2 +- 8 files changed, 278 insertions(+), 364 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 3f27b9442..58009bfcf 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,8 +38,7 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class -GroupEntitiesSparkJob { +public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java index b44ed7446..518f41120 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java @@ -22,7 +22,6 @@ import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Maps; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -42,61 +41,51 @@ public class XmlConverterJob { public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; - public static void main(String[] args) throws Exception { + public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( XmlConverterJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json"))); + .getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json"))); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputPath = parser.get("inputPath"); + final String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - String outputPath = parser.get("outputPath"); + final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - String isLookupUrl = parser.get("isLookupUrl"); + final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); - String otherDsTypeId = parser.get("otherDsTypeId"); - log.info("otherDsTypeId: {}", otherDsTypeId); - - SparkConf conf = new SparkConf(); + final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ProvisionModelSupport.getModelClasses()); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - convertToXml( - spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl), otherDsTypeId); - }); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + convertToXml(spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl)); + }); } private static void convertToXml( - SparkSession spark, - String inputPath, - String outputPath, - ContextMapper contextMapper, - String otherDsTypeId) { + final SparkSession spark, + final String inputPath, + final String outputPath, + final ContextMapper contextMapper) { final XmlRecordFactory recordFactory = new XmlRecordFactory( prepareAccumulators(spark.sparkContext()), contextMapper, false, - schemaLocation, - otherDsTypeId); + schemaLocation); final List paths = HdfsSupport .listFiles(inputPath, spark.sparkContext().hadoopConfiguration()); @@ -116,16 +105,15 @@ public class XmlConverterJob { .mapToPair( (PairFunction, Text, Text>) t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) - .saveAsHadoopFile( - outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } - private static Map prepareAccumulators(SparkContext sc) { - Map accumulators = Maps.newHashMap(); + private static Map prepareAccumulators(final SparkContext sc) { + final Map accumulators = Maps.newHashMap(); accumulators .put( "resultResult_similarity_isAmongTopNSimilarDocuments", @@ -136,15 +124,13 @@ public class XmlConverterJob { sc.longAccumulator("resultResult_similarity_hasAmongTopNSimilarDocuments")); accumulators .put( - "resultResult_supplement_isSupplementTo", - sc.longAccumulator("resultResult_supplement_isSupplementTo")); + "resultResult_supplement_isSupplementTo", sc.longAccumulator("resultResult_supplement_isSupplementTo")); accumulators .put( "resultResult_supplement_isSupplementedBy", sc.longAccumulator("resultResult_supplement_isSupplementedBy")); accumulators - .put( - "resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn")); + .put("resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn")); accumulators.put("resultResult_dedup_merges", sc.longAccumulator("resultResult_dedup_merges")); accumulators @@ -152,16 +138,11 @@ public class XmlConverterJob { "resultResult_publicationDataset_isRelatedTo", sc.longAccumulator("resultResult_publicationDataset_isRelatedTo")); accumulators - .put( - "resultResult_relationship_isRelatedTo", - sc.longAccumulator("resultResult_relationship_isRelatedTo")); + .put("resultResult_relationship_isRelatedTo", sc.longAccumulator("resultResult_relationship_isRelatedTo")); accumulators - .put( - "resultProject_outcome_isProducedBy", - sc.longAccumulator("resultProject_outcome_isProducedBy")); + .put("resultProject_outcome_isProducedBy", sc.longAccumulator("resultProject_outcome_isProducedBy")); accumulators - .put( - "resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces")); + .put("resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces")); accumulators .put( "resultOrganization_affiliation_isAuthorInstitutionOf", @@ -184,9 +165,7 @@ public class XmlConverterJob { "organizationOrganization_dedup_isMergedIn", sc.longAccumulator("organizationOrganization_dedup_isMergedIn")); accumulators - .put( - "organizationOrganization_dedup_merges", - sc.longAccumulator("resultProject_outcome_produces")); + .put("organizationOrganization_dedup_merges", sc.longAccumulator("resultProject_outcome_produces")); accumulators .put( "datasourceOrganization_provision_isProvidedBy", diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index a985d2371..392d3cde6 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.oa.provision.utils; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.*; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.apache.commons.lang3.StringUtils.substringBefore; @@ -9,14 +10,23 @@ import java.io.IOException; import java.io.Serializable; import java.io.StringReader; import java.io.StringWriter; -import java.util.*; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; -import javax.xml.transform.*; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -36,19 +46,38 @@ import com.google.common.collect.Sets; import com.mycila.xmltool.XMLDoc; import com.mycila.xmltool.XMLTag; -import eu.dnetlib.dhp.oa.provision.model.*; +import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.MainEntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.ExternalReference; +import eu.dnetlib.dhp.schema.oaf.ExtraInfo; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.Journal; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { - private final Map accumulators; + /** + * + */ + private static final long serialVersionUID = 2912912999272373172L; - private final Set specialDatasourceTypes; + private final Map accumulators; private final ContextMapper contextMapper; @@ -61,23 +90,20 @@ public class XmlRecordFactory implements Serializable { public XmlRecordFactory( final ContextMapper contextMapper, final boolean indent, - final String schemaLocation, - final String otherDatasourceTypesUForUI) { + final String schemaLocation) { - this(Maps.newHashMap(), contextMapper, indent, schemaLocation, otherDatasourceTypesUForUI); + this(Maps.newHashMap(), contextMapper, indent, schemaLocation); } public XmlRecordFactory( final Map accumulators, final ContextMapper contextMapper, final boolean indent, - final String schemaLocation, - final String otherDatasourceTypesUForUI) { + final String schemaLocation) { this.accumulators = accumulators; this.contextMapper = contextMapper; this.schemaLocation = schemaLocation; - this.specialDatasourceTypes = Sets.newHashSet(Splitter.on(",").trimResults().split(otherDatasourceTypesUForUI)); this.indent = indent; } @@ -87,8 +113,8 @@ public class XmlRecordFactory implements Serializable { final Set contexts = Sets.newHashSet(); // final OafEntity entity = toOafEntity(je.getEntity()); - OafEntity entity = je.getEntity(); - TemplateFactory templateFactory = new TemplateFactory(); + final OafEntity entity = je.getEntity(); + final TemplateFactory templateFactory = new TemplateFactory(); try { final EntityType type = EntityType.fromClass(entity.getClass()); @@ -110,11 +136,7 @@ public class XmlRecordFactory implements Serializable { final String body = templateFactory .buildBody( - mainType, - metadata, - relations, - listChildren(entity, je, templateFactory), - listExtraInfo(entity)); + mainType, metadata, relations, listChildren(entity, je, templateFactory), listExtraInfo(entity)); return printXML(templateFactory.buildRecord(entity, schemaLocation, body), indent); } catch (final Throwable e) { @@ -142,19 +164,19 @@ public class XmlRecordFactory implements Serializable { default: throw new IllegalArgumentException("invalid type: " + type); } - } catch (IOException e) { + } catch (final IOException e) { throw new IllegalArgumentException(e); } } - private String printXML(String xml, boolean indent) { + private String printXML(final String xml, final boolean indent) { try { final Document doc = new SAXReader().read(new StringReader(xml)); - OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); + final OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); format.setExpandEmptyElements(false); format.setSuppressDeclaration(true); - StringWriter sw = new StringWriter(); - XMLWriter writer = new XMLWriter(sw, format); + final StringWriter sw = new StringWriter(); + final XMLWriter writer = new XMLWriter(sw, format); writer.write(doc); return sw.toString(); } catch (IOException | DocumentException e) { @@ -163,7 +185,9 @@ public class XmlRecordFactory implements Serializable { } private List metadata( - final EntityType type, final OafEntity entity, final Set contexts) { + final EntityType type, + final OafEntity entity, + final Set contexts) { final List metadata = Lists.newArrayList(); @@ -230,72 +254,63 @@ public class XmlRecordFactory implements Serializable { .getAuthor() .stream() .filter(Objects::nonNull) - .map( - a -> { - final StringBuilder sb = new StringBuilder(" isNotBlank(sp.getQualifier().getClassid()) - && isNotBlank(sp.getValue())) - .collect( - Collectors - .toMap( - p -> getAuthorPidType(p.getQualifier().getClassid()), - p -> p, - (p1, p2) -> p1)) - .values() - .stream() - .collect( - Collectors - .groupingBy( - p -> p.getValue(), - Collectors - .mapping( - p -> p, - Collectors.minBy(new AuthorPidTypeComparator())))) - .values() - .stream() - .map(op -> op.get()) - .forEach( - sp -> { - String pidType = getAuthorPidType(sp.getQualifier().getClassid()); - String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); - - // ugly hack: some records provide swapped pidtype and pidvalue - if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { - sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); - } else { - if (isNotBlank(pidType)) { - sb - .append( - String - .format( - " %s=\"%s\"", - pidType, - pidValue - .toLowerCase() - .replaceAll("^.*orcid\\.org\\/", ""))); - } - } - }); - } + .map(a -> { + final StringBuilder sb = new StringBuilder("" + XmlSerializationUtils.escapeXml(a.getFullname()) + ""); - return sb.toString(); - }) + .append(" surname=\"" + XmlSerializationUtils.escapeXml(a.getSurname()) + "\""); + } + if (a.getPid() != null) { + a + .getPid() + .stream() + .filter(Objects::nonNull) + .filter( + sp -> isNotBlank(sp.getQualifier().getClassid()) + && isNotBlank(sp.getValue())) + .collect( + Collectors + .toMap( + p -> getAuthorPidType(p.getQualifier().getClassid()), p -> p, + (p1, p2) -> p1)) + .values() + .stream() + .collect( + Collectors + .groupingBy( + p -> p.getValue(), Collectors + .mapping( + p -> p, Collectors.minBy(new AuthorPidTypeComparator())))) + .values() + .stream() + .map(op -> op.get()) + .forEach(sp -> { + final String pidType = getAuthorPidType(sp.getQualifier().getClassid()); + final String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); + + // ugly hack: some records provide swapped pidtype and pidvalue + if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { + sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); + } else { + if (isNotBlank(pidType)) { + sb + .append( + String + .format( + " %s=\"%s\"", pidType, pidValue + .toLowerCase() + .replaceAll("^.*orcid\\.org\\/", ""))); + } + } + }); + } + sb + .append(">" + XmlSerializationUtils.escapeXml(a.getFullname()) + ""); + return sb.toString(); + }) .collect(Collectors.toList())); } if (r.getContributor() != null) { @@ -332,8 +347,7 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "dateofacceptance", r.getDateofacceptance().getValue())); + .asXmlElement("dateofacceptance", r.getDateofacceptance().getValue())); } if (r.getDescription() != null) { metadata @@ -347,8 +361,7 @@ public class XmlRecordFactory implements Serializable { } if (r.getEmbargoenddate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue())); + .add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue())); } if (r.getSubject() != null) { metadata @@ -423,23 +436,20 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "lastmetadataupdate", d.getLastmetadataupdate().getValue())); + .asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue())); } if (d.getMetadataversionnumber() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "metadataversionnumber", d.getMetadataversionnumber().getValue())); + .asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue())); } if (d.getSize() != null) { metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize().getValue())); } if (d.getStoragedate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue())); + .add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue())); } if (d.getVersion() != null) { metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion().getValue())); @@ -509,98 +519,87 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "codeRepositoryUrl", s.getCodeRepositoryUrl().getValue())); + .asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue())); } if (s.getProgrammingLanguage() != null) { metadata .add( XmlSerializationUtils - .mapQualifier( - "programmingLanguage", s.getProgrammingLanguage())); + .mapQualifier("programmingLanguage", s.getProgrammingLanguage())); } break; case datasource: final Datasource ds = (Datasource) entity; if (ds.getDatasourcetype() != null) { - mapDatasourceType(metadata, ds.getDatasourcetype()); + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", ds.getDatasourcetype())); + } + if (ds.getDatasourcetypeui() != null) { + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui())); } if (ds.getOpenairecompatibility() != null) { metadata .add( XmlSerializationUtils - .mapQualifier( - "openairecompatibility", ds.getOpenairecompatibility())); + .mapQualifier("openairecompatibility", ds.getOpenairecompatibility())); } if (ds.getOfficialname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue())); + .add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue())); } if (ds.getEnglishname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue())); + .add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue())); } if (ds.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue())); } if (ds.getLogourl() != null) { metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl().getValue())); } if (ds.getContactemail() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue())); + .add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue())); } if (ds.getNamespaceprefix() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "namespaceprefix", ds.getNamespaceprefix().getValue())); + .asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue())); } if (ds.getLatitude() != null) { metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude().getValue())); } if (ds.getLongitude() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue())); + .add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue())); } if (ds.getDateofvalidation() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "dateofvalidation", ds.getDateofvalidation().getValue())); + .asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue())); } if (ds.getDescription() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); + .add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); } if (ds.getOdnumberofitems() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "odnumberofitems", ds.getOdnumberofitems().getValue())); + .asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue())); } if (ds.getOdnumberofitemsdate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue())); + .asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue())); } if (ds.getOdpolicies() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue())); + .add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue())); } if (ds.getOdlanguages() != null) { metadata @@ -635,50 +634,43 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "releasestartdate", ds.getReleaseenddate().getValue())); + .asXmlElement("releasestartdate", ds.getReleaseenddate().getValue())); } if (ds.getReleaseenddate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "releaseenddate", ds.getReleaseenddate().getValue())); + .asXmlElement("releaseenddate", ds.getReleaseenddate().getValue())); } if (ds.getMissionstatementurl() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "missionstatementurl", ds.getMissionstatementurl().getValue())); + .asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue())); } if (ds.getDataprovider() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "dataprovider", ds.getDataprovider().getValue().toString())); + .asXmlElement("dataprovider", ds.getDataprovider().getValue().toString())); } if (ds.getServiceprovider() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "serviceprovider", ds.getServiceprovider().getValue().toString())); + .asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString())); } if (ds.getDatabaseaccesstype() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "databaseaccesstype", ds.getDatabaseaccesstype().getValue())); + .asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue())); } if (ds.getDatauploadtype() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "datauploadtype", ds.getDatauploadtype().getValue())); + .asXmlElement("datauploadtype", ds.getDatauploadtype().getValue())); } if (ds.getDatabaseaccessrestriction() != null) { metadata @@ -691,39 +683,33 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "datauploadrestriction", ds.getDatauploadrestriction().getValue())); + .asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue())); } if (ds.getVersioning() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "versioning", ds.getVersioning().getValue().toString())); + .asXmlElement("versioning", ds.getVersioning().getValue().toString())); } if (ds.getCitationguidelineurl() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "citationguidelineurl", ds.getCitationguidelineurl().getValue())); + .asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue())); } if (ds.getQualitymanagementkind() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "qualitymanagementkind", ds.getQualitymanagementkind().getValue())); + .asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue())); } if (ds.getPidsystems() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); + .add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); } if (ds.getCertificates() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue())); + .add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue())); } if (ds.getPolicies() != null) { metadata @@ -757,13 +743,11 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "legalshortname", o.getLegalshortname().getValue())); + .asXmlElement("legalshortname", o.getLegalshortname().getValue())); } if (o.getLegalname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue())); + .add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue())); } if (o.getAlternativeNames() != null) { metadata @@ -777,8 +761,7 @@ public class XmlRecordFactory implements Serializable { } if (o.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); } if (o.getLogourl() != null) { metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue())); @@ -786,32 +769,27 @@ public class XmlRecordFactory implements Serializable { if (o.getEclegalbody() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue())); + .add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue())); } if (o.getEclegalperson() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue())); + .add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue())); } if (o.getEcnonprofit() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue())); } if (o.getEcresearchorganization() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "ecresearchorganization", o.getEcresearchorganization().getValue())); + .asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue())); } if (o.getEchighereducation() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "echighereducation", o.getEchighereducation().getValue())); + .asXmlElement("echighereducation", o.getEchighereducation().getValue())); } if (o.getEcinternationalorganizationeurinterests() != null) { metadata @@ -830,20 +808,17 @@ public class XmlRecordFactory implements Serializable { } if (o.getEcenterprise() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue())); } if (o.getEcsmevalidated() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "ecsmevalidated", o.getEcsmevalidated().getValue())); + .asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue())); } if (o.getEcnutscode() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue())); } if (o.getCountry() != null) { metadata.add(XmlSerializationUtils.mapQualifier("country", o.getCountry())); @@ -855,8 +830,7 @@ public class XmlRecordFactory implements Serializable { if (p.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue())); } if (p.getCode() != null) { metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode().getValue())); @@ -869,8 +843,7 @@ public class XmlRecordFactory implements Serializable { } if (p.getStartdate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue())); + .add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue())); } if (p.getEnddate() != null) { metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate().getValue())); @@ -879,8 +852,7 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "callidentifier", p.getCallidentifier().getValue())); + .asXmlElement("callidentifier", p.getCallidentifier().getValue())); } if (p.getKeywords() != null) { metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords().getValue())); @@ -890,8 +862,7 @@ public class XmlRecordFactory implements Serializable { } if (p.getEcarticle29_3() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue())); } if (p.getSubjects() != null) { metadata @@ -923,13 +894,11 @@ public class XmlRecordFactory implements Serializable { } if (p.getTotalcost() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString())); + .add(XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString())); } if (p.getFundedamount() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString())); + .add(XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString())); } if (p.getFundingtree() != null) { metadata @@ -950,28 +919,18 @@ public class XmlRecordFactory implements Serializable { return metadata; } - private String getAuthorPidType(String s) { + private String getAuthorPidType(final String s) { return XmlSerializationUtils .escapeXml(s) .replaceAll("\\W", "") .replaceAll("\\d", ""); } - private static boolean kvNotBlank(KeyValue kv) { + private static boolean kvNotBlank(final KeyValue kv) { return kv != null && StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()); } - private void mapDatasourceType(List metadata, final Qualifier dsType) { - metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", dsType)); - - if (specialDatasourceTypes.contains(dsType.getClassid())) { - dsType.setClassid("other"); - dsType.setClassname("other"); - } - metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", dsType)); - } - - private List mapFields(RelatedEntityWrapper link, Set contexts) { + private List mapFields(final RelatedEntityWrapper link, final Set contexts) { final Relation rel = link.getRelation(); final RelatedEntity re = link.getTarget(); final String targetType = link.getTarget().getType(); @@ -987,16 +946,14 @@ public class XmlRecordFactory implements Serializable { } if (isNotBlank(re.getDateofacceptance())) { metadata - .add( - XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance())); + .add(XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance())); } if (isNotBlank(re.getPublisher())) { metadata.add(XmlSerializationUtils.asXmlElement("publisher", re.getPublisher())); } if (isNotBlank(re.getCodeRepositoryUrl())) { metadata - .add( - XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); + .add(XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); } if (re.getResulttype() != null && re.getResulttype().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype())); @@ -1026,14 +983,16 @@ public class XmlRecordFactory implements Serializable { metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname())); } if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) { - mapDatasourceType(metadata, re.getDatasourcetype()); + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", re.getDatasourcetype())); + } + if (re.getDatasourcetypeui() != null && !re.getDatasourcetypeui().isBlank()) { + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui())); } if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) { metadata .add( XmlSerializationUtils - .mapQualifier( - "openairecompatibility", re.getOpenairecompatibility())); + .mapQualifier("openairecompatibility", re.getOpenairecompatibility())); } break; case organization: @@ -1042,8 +1001,7 @@ public class XmlRecordFactory implements Serializable { } if (isNotBlank(re.getLegalshortname())) { metadata - .add( - XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); + .add(XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); } if (re.getCountry() != null && !re.getCountry().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry())); @@ -1085,8 +1043,10 @@ public class XmlRecordFactory implements Serializable { return metadata; } - private String mapRelation(Set contexts, TemplateFactory templateFactory, EntityType type, - RelatedEntityWrapper link) { + private String mapRelation(final Set contexts, + final TemplateFactory templateFactory, + final EntityType type, + final RelatedEntityWrapper link) { final Relation rel = link.getRelation(); final String targetType = link.getTarget().getType(); final String scheme = ModelSupport.getScheme(type.toString(), targetType); @@ -1096,8 +1056,9 @@ public class XmlRecordFactory implements Serializable { String.format("missing scheme for: <%s - %s>", type, targetType)); } final HashSet fields = Sets.newHashSet(mapFields(link, contexts)); - if (rel.getValidated() == null) + if (rel.getValidated() == null) { rel.setValidated(false); + } return templateFactory .getRel( targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(), @@ -1105,12 +1066,14 @@ public class XmlRecordFactory implements Serializable { } private List listChildren( - final OafEntity entity, JoinedEntity je, TemplateFactory templateFactory) { + final OafEntity entity, + final JoinedEntity je, + final TemplateFactory templateFactory) { final EntityType entityType = EntityType.fromClass(je.getEntity().getClass()); final List links = je.getLinks(); - List children = links + final List children = links .stream() .filter(link -> isDuplicate(link)) .map(link -> { @@ -1131,13 +1094,11 @@ public class XmlRecordFactory implements Serializable { if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); + .add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); } if (instance.getCollectedfrom() != null && kvNotBlank(instance.getCollectedfrom())) { fields - .add( - XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); + .add(XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); } if (instance.getHostedby() != null && kvNotBlank(instance.getHostedby())) { fields.add(XmlSerializationUtils.mapKeyValue("hostedby", instance.getHostedby())); @@ -1147,20 +1108,17 @@ public class XmlRecordFactory implements Serializable { fields .add( XmlSerializationUtils - .asXmlElement( - "dateofacceptance", instance.getDateofacceptance().getValue())); + .asXmlElement("dateofacceptance", instance.getDateofacceptance().getValue())); } if (instance.getInstancetype() != null && !instance.getInstancetype().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype())); + .add(XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype())); } if (isNotBlank(instance.getDistributionlocation())) { fields .add( XmlSerializationUtils - .asXmlElement( - "distributionlocation", instance.getDistributionlocation())); + .asXmlElement("distributionlocation", instance.getDistributionlocation())); } if (instance.getPid() != null) { fields @@ -1185,8 +1143,7 @@ public class XmlRecordFactory implements Serializable { if (instance.getRefereed() != null && !instance.getRefereed().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); + .add(XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); } if (instance.getProcessingchargeamount() != null && isNotBlank(instance.getProcessingchargeamount().getValue())) { @@ -1208,8 +1165,7 @@ public class XmlRecordFactory implements Serializable { children .add( templateFactory - .getInstance( - instance.getHostedby().getKey(), fields, instance.getUrl())); + .getInstance(instance.getHostedby().getKey(), fields, instance.getUrl())); } } final List ext = ((Result) entity).getExternalReference(); @@ -1254,11 +1210,11 @@ public class XmlRecordFactory implements Serializable { return children; } - private boolean isDuplicate(RelatedEntityWrapper link) { + private boolean isDuplicate(final RelatedEntityWrapper link) { return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType()); } - private List listExtraInfo(OafEntity entity) { + private List listExtraInfo(final OafEntity entity) { final List extraInfo = entity.getExtraInfo(); return extraInfo != null ? extraInfo @@ -1271,7 +1227,7 @@ public class XmlRecordFactory implements Serializable { private List buildContexts(final String type, final Set contexts) { final List res = Lists.newArrayList(); - if ((contextMapper != null) + if (contextMapper != null && !contextMapper.isEmpty() && MainEntityType.result.toString().equals(type)) { @@ -1302,8 +1258,7 @@ public class XmlRecordFactory implements Serializable { if (def.getName().equals("category")) { final String rootId = substringBefore(def.getId(), "::"); document = addContextDef( - document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), - def); + document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def); } if (def.getName().equals("concept")) { @@ -1327,17 +1282,17 @@ public class XmlRecordFactory implements Serializable { private Transformer getTransformer() { try { - Transformer transformer = TransformerFactory.newInstance().newTransformer(); + final Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); return transformer; - } catch (TransformerConfigurationException e) { + } catch (final TransformerConfigurationException e) { throw new IllegalStateException("unable to create javax.xml.transform.Transformer", e); } } private XMLTag addContextDef(final XMLTag tag, final ContextDef def) { tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel()); - if ((def.getType() != null) && !def.getType().isEmpty()) { + if (def.getType() != null && !def.getType().isEmpty()) { tag.addAttribute("type", def.getType()); } return tag; @@ -1374,16 +1329,14 @@ public class XmlRecordFactory implements Serializable { if (level0 != null) { final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name")); contextMapper - .put( - level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", "")); + .put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", "")); final Node level1 = fundingPath.selectSingleNode("//funding_level_1"); if (level1 == null) { contexts.add(level0Id); } else { final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name")); contextMapper - .put( - level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", "")); + .put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", "")); final Node level2 = fundingPath.selectSingleNode("//funding_level_2"); if (level2 == null) { contexts.add(level1Id); @@ -1391,8 +1344,7 @@ public class XmlRecordFactory implements Serializable { final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name")); contextMapper .put( - level2Id, - new ContextDef(level2Id, level2.valueOf("./description"), "concept", "")); + level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", "")); contexts.add(level2Id); } } @@ -1413,8 +1365,7 @@ public class XmlRecordFactory implements Serializable { funding += getFunderElement(ftree); for (final Object o : Lists - .reverse( - ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) { + .reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) { final Element e = (Element) o; final String _id = e.valueOf("./id"); funding += "<" diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json index 32720514e..eda6154d7 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json @@ -16,11 +16,5 @@ "paramLongName": "isLookupUrl", "paramDescription": "URL of the isLookUp Service", "paramRequired": true - }, - { - "paramName": "odt", - "paramLongName": "otherDsTypeId", - "paramDescription": "list of datasource types to populate field datasourcetypeui", - "paramRequired": true } ] diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 9280678c1..9f41805e2 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -25,10 +25,6 @@ targetMaxRelations maximum number of relations allowed for a each entity grouping by target - - otherDsTypeId - mapping used to populate datasourceTypeUi field - format metadata format name (DMF|TMF) @@ -582,7 +578,6 @@ --inputPath${workingDir}/join_entities --outputPath${workingDir}/xml --isLookupUrl${isLookupUrl} - --otherDsTypeId${otherDsTypeId} diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e1ed4ffe4..cd07cfcb1 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.provision; import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; -import java.util.List; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -16,11 +15,9 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; @@ -49,7 +46,7 @@ public class IndexRecordTransformerTest { @Test public void testPreBuiltRecordTransformation() throws IOException, TransformerException { - String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); + final String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); testRecordTransformation(record); } @@ -57,14 +54,14 @@ public class IndexRecordTransformerTest { @Test public void testPublicationRecordTransformation() throws IOException, TransformerException { - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - XmlRecordFactoryTest.otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = load("publication.json", Publication.class); - Project pj = load("project.json", Project.class); - Relation rel = load("relToValidatedProject.json", Relation.class); + final Publication p = load("publication.json", Publication.class); + final Project pj = load("project.json", Project.class); + final Relation rel = load("relToValidatedProject.json", Relation.class); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je .setLinks( Lists @@ -72,24 +69,25 @@ public class IndexRecordTransformerTest { new RelatedEntityWrapper(rel, CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class)))); - String record = xmlRecordFactory.build(je); + final String record = xmlRecordFactory.build(je); assertNotNull(record); testRecordTransformation(record); } - private void testRecordTransformation(String record) throws IOException, TransformerException { - String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); - String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); + private void testRecordTransformation(final String record) throws IOException, TransformerException { + final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); + final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); - String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); + final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); - Transformer tr = SaxonTransformerFactory.newInstance(transformer); + final Transformer tr = SaxonTransformerFactory.newInstance(transformer); - String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); + final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); - SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID).parseDocument(indexRecordXML); + final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID) + .parseDocument(indexRecordXML); final String xmlDoc = ClientUtils.toXML(solrDoc); @@ -97,7 +95,7 @@ public class IndexRecordTransformerTest { System.out.println(xmlDoc); } - private T load(String fileName, Class clazz) throws IOException { + private T load(final String fileName, final Class clazz) throws IOException { return XmlRecordFactoryTest.OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz); } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 6631cb4da..2b5e08e92 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.oa.provision; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; import java.io.StringReader; @@ -29,27 +30,25 @@ import eu.dnetlib.dhp.schema.oaf.Relation; public class XmlRecordFactoryTest { - public static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; - public static ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Test public void testXMLRecordFactory() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); + final String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); @@ -72,30 +71,29 @@ public class XmlRecordFactoryTest { @Test public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - Project pj = OBJECT_MAPPER + final Project pj = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class); - Relation rel = OBJECT_MAPPER - .readValue( - (IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json"))), Relation.class); - RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); - List links = Lists.newArrayList(); - RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); + final Relation rel = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json")), Relation.class); + final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); + final List links = Lists.newArrayList(); + final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); links.add(rew); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je.setLinks(links); - String xml = xmlRecordFactory.build(je); + final String xml = xmlRecordFactory.build(je); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); System.out.println(doc.asXML()); Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date")); @@ -104,29 +102,29 @@ public class XmlRecordFactoryTest { @Test public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - Project pj = OBJECT_MAPPER + final Project pj = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class); - Relation rel = OBJECT_MAPPER - .readValue((IOUtils.toString(getClass().getResourceAsStream("relToProject.json"))), Relation.class); - RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); - List links = Lists.newArrayList(); - RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); + final Relation rel = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("relToProject.json")), Relation.class); + final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); + final List links = Lists.newArrayList(); + final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); links.add(rew); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je.setLinks(links); - String xml = xmlRecordFactory.build(je); + final String xml = xmlRecordFactory.build(je); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); System.out.println(doc.asXML()); assertEquals("", doc.valueOf("//rel/validated")); diff --git a/pom.xml b/pom.xml index 6e4526e41..1a3523f2b 100644 --- a/pom.xml +++ b/pom.xml @@ -736,7 +736,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.14] + [2.7.15-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From 0424f47494ffcc792fc4d9c7dc4e79407dc105c5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 10:24:13 +0200 Subject: [PATCH 078/287] HostedByMap fixing issues --- .../dhp/oa/graph/hostebymap/Aggregators.scala | 2 +- .../SparkPrepareHostedByMapData.scala | 240 +++++++++++------- .../oa/graph/hostedbymap/TestPreprocess.scala | 85 ++++++- .../dhp/oa/graph/hostedbymap/datasource.json | 20 +- 4 files changed, 234 insertions(+), 113 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala index d98418339..561d2bbf4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala @@ -5,7 +5,7 @@ import org.apache.spark.sql.expressions.Aggregator case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} - +case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} object Aggregators { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala index 55b6e36cd..b66e97281 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala @@ -2,27 +2,23 @@ package eu.dnetlib.dhp.oa.graph.hostebymap import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostebymap.model.{DOAJModel, UnibiGoldModel} -import eu.dnetlib.dhp.oa.merge.AuthorMerger -import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.{Datasource, Organization, Publication, Relation} +import eu.dnetlib.dhp.schema.oaf.{Datasource} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} -import org.json4s.jackson.Serialization.write -import scala.collection.mutable.ListBuffer +import com.fasterxml.jackson.databind.ObjectMapper object SparkPrepareHostedByMapData { - case class HostedByInfo(id: Option[String], officialname: String, journal_id: String, provenance : String, id_type: String) {} implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - implicit val mapEncoderDats: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) - implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] - implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] - implicit val mapEncoderHBI: Encoder[HostedByInfo] = Encoders.product[HostedByInfo] + + + + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { @@ -32,59 +28,144 @@ object SparkPrepareHostedByMapData { val isOpenAccess: Boolean = doaj == null && gold == null openaire.journal_id match { - case Constants.ISSN => return HostedByItemType(openaire.id.get, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) - case Constants.EISSN => return HostedByItemType(openaire.id.get, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) - case Constants.ISSNL => return HostedByItemType(openaire.id.get, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) + case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) + case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) // catch the default with a variable so you can print it - case whoa => return null + case whoa => null } } - def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { - implicit val formats = DefaultFormats - val serializedJSON:String = write(input) +// def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { +// implicit val formats = DefaultFormats +// val serializedJSON:String = write(input) +// +// var hostedBy = new ListBuffer[String]() +// if(!input.issn.equals("")){ +// hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" +// } +// if(!input.eissn.equals("")){ +// hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" +// } +// if(!input.lissn.equals("")){ +// hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" +// } +// +// hostedBy +// +// } - var hostedBy = new ListBuffer[String]() + def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { + if(issn != null){ + if(eissn != null){ + if(issnl != null){ + HostedByItemType(id, officialname, issn, eissn, issnl , oa) + }else{ + HostedByItemType(id, officialname, issn, eissn, "" , oa) + } + }else{ + if(issnl != null){ + HostedByItemType(id, officialname, issn, "", issnl , oa) + }else{ + HostedByItemType(id, officialname, issn, "", "" , oa) + } + } + }else{ + if(eissn != null){ + if(issnl != null){ + HostedByItemType(id, officialname, "", eissn, issnl , oa) + }else{ + HostedByItemType(id, officialname, "", eissn, "" , oa) + } + }else{ + if(issnl != null){ + HostedByItemType(id, officialname, "", "", issnl , oa) + }else{ + HostedByItemType("", "", "", "", "" , oa) + } + } + } + } + + def oaToHostedbyItemType(dats: Datasource): HostedByItemType = { + if (dats.getJournal != null) { + + return getHostedByItemType(dats.getId, dats.getOfficialname.getValue, dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, false) + } + HostedByItemType("","","","","",false) + } + + def oaHostedByDataset(spark:SparkSession, datasourcePath : String) : Dataset[HostedByItemType] = { + + import spark.implicits._ + + + val mapper = new ObjectMapper() + + implicit var encoderD = Encoders.kryo[Datasource] + + val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[Datasource])) + + dd.map{ddt => oaToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + + } + + + def goldToHostedbyItemType(gold: UnibiGoldModel): HostedByItemType = { + return getHostedByItemType(Constants.UNIBI, gold.getTitle, gold.getIssn, "", gold.getIssn_l, true) + } + + + def goldHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + import spark.implicits._ + + implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] + + val mapper = new ObjectMapper() + + val dd : Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[UnibiGoldModel])) + + dd.map{ddt => goldToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + + } + + def doajToHostedbyItemType(doaj: DOAJModel): HostedByItemType = { + + return getHostedByItemType(Constants.DOAJ, doaj.getJournalTitle, doaj.getIssn, doaj.getEissn, "", true) + } + + def doajHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + import spark.implicits._ + + implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] + + val mapper = new ObjectMapper() + + val dd : Dataset[DOAJModel] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[DOAJModel])) + + dd.map{ddt => doajToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + + } + + def toList(input: HostedByItemType): List[(String, HostedByItemType)] = { + var lst : List[(String, HostedByItemType)] = List() if(!input.issn.equals("")){ - hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" + lst = (input.issn, input) :: lst } if(!input.eissn.equals("")){ - hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" + lst = (input.eissn, input) :: lst } if(!input.lissn.equals("")){ - hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" + lst = (input.lissn, input) :: lst } - - hostedBy - + lst } - def readOADataset(input:String, spark: SparkSession): Dataset[HostedByInfo] = { - spark.read.textFile(input).as[Datasource].flatMap(ds => { - val lst = new ListBuffer[HostedByInfo]() - if (ds.getJournal == null) { - return null - } - val issn: String = ds.getJournal.getIssnPrinted - val issnl: String = ds.getJournal.getIssnOnline - val eissn: String = ds.getJournal.getIssnOnline - val id: String = ds.getId - val officialname: String = ds.getOfficialname.getValue - if (issn != null) { - lst += HostedByInfo(Some(id), officialname, issn, Constants.OPENAIRE, Constants.ISSN) - } - if (issnl != null) { - lst += HostedByInfo(Some(id), officialname, issnl, Constants.OPENAIRE, Constants.ISSNL) - } - if (eissn != null) { - lst += HostedByInfo(Some(id), officialname, eissn, Constants.OPENAIRE, Constants.EISSN) - } - lst - }).filter(i => i != null) - } - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) @@ -105,53 +186,34 @@ object SparkPrepareHostedByMapData { + implicit val formats = DefaultFormats + logger.info("Getting the Datasources") - val doajDataset: Dataset[DOAJModel] = spark.read.load(workingDirPath + "/doaj").as[DOAJModel] - val unibiDataset: Dataset[UnibiGoldModel] = spark.read.load(datasourcePath).as[UnibiGoldModel] + // val doajDataset: Dataset[DOAJModel] = spark.read.textFile(workingDirPath + "/doaj").as[DOAJModel] - val oa: Dataset[HostedByInfo] = readOADataset(datasourcePath, spark) - - val doaj: Dataset[HostedByInfo] = doajDataset.flatMap(doaj => { - val lst = new ListBuffer[HostedByInfo]() - val issn: String = doaj.getIssn - val eissn: String = doaj.getEissn - val officialname: String = doaj.getJournalTitle - if (issn != null) { - lst += HostedByInfo(null, officialname, issn, Constants.DOAJ, Constants.ISSN) - } - if (eissn != null) { - lst += HostedByInfo(null, officialname, eissn, Constants.DOAJ, Constants.EISSN) - } - lst - }) - - val gold: Dataset[HostedByInfo] = unibiDataset.flatMap(gold => { - val lst = new ListBuffer[HostedByInfo]() - val issn: String = gold.getIssn - val issnl: String = gold.getIssn_l - val officialname: String = gold.getTitle - if (issn != null) { - lst += HostedByInfo(null, officialname, issn, Constants.UNIBI, Constants.ISSN) - } - if (issnl != null) { - lst += HostedByInfo(null, officialname, issnl, Constants.UNIBI, Constants.ISSNL) - } - lst - }) - - Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") - .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) - .filter(i => i != null)) - .flatMap(toHostedByMap) - // .map(i => (i.id,i)) - // .groupByKey(_._1) - // .agg(hostedByAggregator.toColumn) - // .map(p => p._2) - .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") + val dats : Dataset[HostedByItemType] = + oaHostedByDataset(spark, datasourcePath) + .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold")) + .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) + dats.flatMap(hbi => toList(hbi)) + .groupByKey(_._1) +// +// + +// + +// +// Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") +// .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) +// .filter(i => i != null)) +// .flatMap(toHostedByMap) +// .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") +// +// } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 657f20fce..8e5657bfc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -2,22 +2,23 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import java.sql.Timestamp -import eu.dnetlib.dhp.oa.graph.hostebymap.SparkPrepareHostedByMapData -import eu.dnetlib.dhp.oa.graph.hostebymap.SparkPrepareHostedByMapData.HostedByInfo +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.oa.graph.hostebymap.{Constants, HostedByInfo, SparkPrepareHostedByMapData} +import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, SparkSession} -import org.codehaus.jackson.map.ObjectMapper +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} import org.junit.jupiter.api.Test import org.slf4j.{Logger, LoggerFactory} +import scala.collection.mutable.ListBuffer import scala.io.Source -class TestPreprocess { +class TestPreprocess extends java.io.Serializable{ - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() + implicit val mapEncoderDats: Encoder[Datasource] = Encoders.kryo[Datasource] + implicit val schema = Encoders.product[HostedByInfo] @@ -27,7 +28,11 @@ class TestPreprocess { import org.apache.spark.sql.Encoders implicit val formats = DefaultFormats - import org.json4s.jackson.Serialization.write + + val logger: Logger = LoggerFactory.getLogger(getClass) + val mapper = new ObjectMapper() + + val conf = new SparkConf() conf.setMaster("local[*]") @@ -41,12 +46,67 @@ class TestPreprocess { val path = getClass.getResource("datasource.json").getPath - val schema = Encoders.product[HostedByInfo] + println(SparkPrepareHostedByMapData.oaHostedByDataset(spark, path).count) - spark.read.textFile(path).foreach(r => println(mapper.writeValueAsString(r))) -// SparkPrepareHostedByMapData.readOADataset(path, spark) -// .foreach(r => println(write(r))) + + spark.close() + } + + + @Test + def readGold():Unit = { + + implicit val formats = DefaultFormats + + val logger: Logger = LoggerFactory.getLogger(getClass) + val mapper = new ObjectMapper() + + + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("unibi_transformed.json").getPath + + + println(SparkPrepareHostedByMapData.goldHostedByDataset(spark, path).count) + + + + spark.close() + } + + @Test + def readDoaj():Unit = { + + implicit val formats = DefaultFormats + + val logger: Logger = LoggerFactory.getLogger(getClass) + val mapper = new ObjectMapper() + + + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("doaj_transformed.json").getPath + + + println(SparkPrepareHostedByMapData.doajHostedByDataset(spark, path).count) + spark.close() @@ -55,5 +115,4 @@ class TestPreprocess { - } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json index 15e1dcc45..818aaa716 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -1,10 +1,10 @@ -{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.energyret.ru/jour/"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"extraInfo":[],"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1947-9336","issnPrinted":"1947-9328","name":"International Journal of Operations Research and Information Systems"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl19479328"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1947-9328"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"extraInfo":[],"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0007-1528","name":"Bulletin of the British Mycological Society"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00071528"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0007-1528"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"extraInfo":[],"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2410-3993","issnPrinted":"","name":"Journal of Technology and Innovation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24103993"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn__online::2410-3993"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"citationguidelineurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.gbif.org/citation-guidelines"},"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"databaseaccesstype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"open"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"datarepository::unknown","classname":"Data Repository","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datauploadrestriction":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"other"},"datauploadtype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"restricted"},"dateofcollection":"2019-02-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bavarian Natural History Collections - occurrence data"},"extraInfo":[],"id":"10|re3data_____::b105fa2123b1e2bc3dfff303454c6f72","lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"missionstatementurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"r3b105fa2123"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DWB BioCASe Data Publication pipeline and RDF service"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["re3data_____::r3d100012934"],"pid":[],"pidsystems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DOI URN"},"policies":[],"qualitymanagementkind":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"yes"},"releasestartdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2006-01-01"},"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Life Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Plant Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Zoology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Evolution, Anthropology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Biology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Natural Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geology and Palaeontology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geochemistry, Mineralogy and Crystallography"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geosciences (including Geography)"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/dwb_biocase.html"}} \ No newline at end of file +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"extraInfo":[],"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1947-9336","issnPrinted":"1947-9328","name":"International Journal of Operations Research and Information Systems"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl19479328"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1947-9328"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"extraInfo":[],"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0007-1528","name":"Bulletin of the British Mycological Society"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00071528"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0007-1528"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"extraInfo":[],"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2410-3993","issnPrinted":"","name":"Journal of Technology and Innovation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24103993"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn__online::2410-3993"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"citationguidelineurl":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.gbif.org/citation-guidelines"},"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"databaseaccesstype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"open"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"datarepository::unknown","classname":"Data Repository","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datauploadrestriction":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"other"},"datauploadtype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"restricted"},"dateofcollection":"2019-02-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bavarian Natural History Collections - occurrence data"},"extraInfo":[],"id":"10|re3data_____::b105fa2123b1e2bc3dfff303454c6f72","lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"missionstatementurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"r3b105fa2123"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DWB BioCASe Data Publication pipeline and RDF service"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["re3data_____::r3d100012934"],"pid":[],"pidsystems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DOI URN"},"policies":[],"qualitymanagementkind":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"yes"},"releasestartdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2006-01-01"},"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Life Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Plant Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Zoology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Evolution, Anthropology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Biology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Natural Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geology and Palaeontology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geochemistry, Mineralogy and Crystallography"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geosciences (including Geography)"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/dwb_biocase.html"}} \ No newline at end of file From 16c91203bd434664fdb8b8a3633fb683880039c5 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 28 Jul 2021 10:30:49 +0200 Subject: [PATCH 079/287] implemented workflow of creation action set for scholexplorer --- .../datacite/AbstractRestClient.scala | 13 +-- .../datacite/ImportDatacite.scala | 2 +- .../sx/provision/SparkCreateActionset.scala | 90 +++++++++++++++++++ .../dhp/sx/provision/SparkSaveActionSet.scala | 86 ++++++++++++++++++ .../dhp/sx/actionset/generate_actionset.json | 6 ++ .../sx/actionset/oozie_app/config-default.xml | 23 +++++ .../dhp/sx/actionset/oozie_app/workflow.xml | 76 ++++++++++++++++ .../dhp/sx/actionset/save_actionset.json | 5 ++ 8 files changed, 295 insertions(+), 6 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala create mode 100644 dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala create mode 100644 dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json create mode 100644 dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala index 8df203283..823187afe 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala @@ -1,9 +1,10 @@ package eu.dnetlib.dhp.actionmanager.datacite import org.apache.commons.io.IOUtils +import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.{HttpGet, HttpPost, HttpRequestBase, HttpUriRequest} import org.apache.http.entity.StringEntity -import org.apache.http.impl.client.HttpClients +import org.apache.http.impl.client.{HttpClientBuilder, HttpClients} import java.io.IOException @@ -56,11 +57,15 @@ abstract class AbstractRestClient extends Iterator[String]{ private def doHTTPRequest[A <: HttpUriRequest](r: A) :String ={ - val client = HttpClients.createDefault + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client =HttpClientBuilder.create().setDefaultRequestConfig(config).build() var tries = 4 try { while (tries > 0) { - println(s"requesting ${r.getURI}") val response = client.execute(r) println(s"get response with status${response.getStatusLine.getStatusCode}") @@ -80,7 +85,5 @@ abstract class AbstractRestClient extends Iterator[String]{ throw new RuntimeException("Unable to close client ", e) } } - getBufferData() - } \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala index 931ac06f6..2b73d2955 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala @@ -140,7 +140,7 @@ object ImportDatacite { private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration, bs:Int): Long = { var from:Long = timestamp * 1000 - val delta:Long = 50000000L + val delta:Long = 100000000L var client: DataciteAPIImporter = null val now :Long =System.currentTimeMillis() var i = 0 diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala new file mode 100644 index 000000000..6f0cdcf8a --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala @@ -0,0 +1,90 @@ +package eu.dnetlib.dhp.sx.provision + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} +import org.apache.spark.{SparkConf, sql} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +import scala.io.Source + +object SparkCreateActionset { + + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/generate_actionset.json")).mkString) + parser.parseArgument(args) + + + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + + val workingDirFolder = parser.get("workingDirFolder") + log.info(s"workingDirFolder -> $workingDirFolder") + + implicit val oafEncoders:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoders:Encoder[Result] = Encoders.kryo[Result] + implicit val relationEncoders:Encoder[Relation] = Encoders.kryo[Relation] + + import spark.implicits._ + + val relation = spark.read.load(s"$sourcePath/relation").as[Relation] + + relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .flatMap(r => List(r.getSource,r.getTarget)).distinct().write.save(s"$workingDirFolder/id_relation") + + + val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String] + + log.info("extract source and target Identifier involved in relations") + + + log.info("save relation filtered") + + relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf") + + log.info("saving publication") + + val publication:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/publication").as[Result].map(p => (p.getId, p)) + + publication + .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + .map(p => p._1._2) + .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") + + log.info("saving dataset") + val dataset:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/dataset").as[Result].map(p => (p.getId, p)) + dataset + .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + .map(p => p._1._2) + .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") + + log.info("saving software") + val software:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/software").as[Result].map(p => (p.getId, p)) + software + .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + .map(p => p._1._2) + .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") + + log.info("saving Other Research product") + val orp:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/otherresearchproduct").as[Result].map(p => (p.getId, p)) + orp + .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + .map(p => p._1._2) + .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") + } + +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala new file mode 100644 index 000000000..d1d0b8424 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala @@ -0,0 +1,86 @@ +package eu.dnetlib.dhp.sx.provision + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.action.AtomicAction +import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Relation, Software, Dataset => OafDataset} +import org.apache.hadoop.io.Text +import org.apache.hadoop.io.compress.GzipCodec +import org.apache.hadoop.mapred.SequenceFileOutputFormat +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +import scala.io.Source + +object SparkSaveActionSet { + + + def toActionSet(item: Oaf): (String, String) = { + val mapper = new ObjectMapper() + + item match { + case dataset: OafDataset => + val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset] + a.setClazz(classOf[OafDataset]) + a.setPayload(dataset) + (dataset.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case publication: Publication => + val a: AtomicAction[Publication] = new AtomicAction[Publication] + a.setClazz(classOf[Publication]) + a.setPayload(publication) + (publication.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case software: Software => + val a: AtomicAction[Software] = new AtomicAction[Software] + a.setClazz(classOf[Software]) + a.setPayload(software) + (software.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case orp: OtherResearchProduct => + val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct] + a.setClazz(classOf[OtherResearchProduct]) + a.setPayload(orp) + (orp.getClass.getCanonicalName, mapper.writeValueAsString(a)) + + case relation: Relation => + val a: AtomicAction[Relation] = new AtomicAction[Relation] + a.setClazz(classOf[Relation]) + a.setPayload(relation) + (relation.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case _ => + null + } + + } + + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/save_actionset.json")).mkString) + parser.parseArgument(args) + + + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + + implicit val oafEncoders:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val tEncoder:Encoder[(String,String)] = Encoders.tuple(Encoders.STRING,Encoders.STRING) + + spark.read.load(sourcePath).as[Oaf] + .map(o =>toActionSet(o)) + .filter(o => o!= null) + .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec]) + + } + +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json new file mode 100644 index 000000000..0563808ea --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json @@ -0,0 +1,6 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath","paramDescription": "source path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingDirFolder","paramDescription": "the working Dir Folder", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath","paramDescription": "the target path ", "paramRequired": true} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml new file mode 100644 index 000000000..dd3c32c62 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml @@ -0,0 +1,23 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml new file mode 100644 index 000000000..ef86a1772 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml @@ -0,0 +1,76 @@ + + + + sourcePath + the path of the consistent graph + + + workingDirFolder + the path of working dir ActionSet + + + outputPath + the path of Scholexplorer ActionSet + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn-cluster + cluster + Create Action Set + eu.dnetlib.dhp.sx.provision.SparkCreateActionset + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --targetPath${outputPath} + --workingDirFolder${workingDirFolder} + --masteryarn-cluster + + + + + + + + + yarn-cluster + cluster + Save Action Set + eu.dnetlib.dhp.sx.provision.SparkSaveActionSet + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${workingDirFolder}/actionSetOaf + --targetPath${outputPath} + --masteryarn-cluster + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json new file mode 100644 index 000000000..0264c825f --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json @@ -0,0 +1,5 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath","paramDescription": "source path", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath","paramDescription": "the target path ", "paramRequired": true} +] \ No newline at end of file From 1fb572a33ae1fa374d3237aeefb977de8de6685e Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 10:52:24 +0200 Subject: [PATCH 080/287] added eosc fields --- .../dnetlib/dhp/oa/graph/sql/queryDatasources.sql | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql index f0a4161ab..f58677101 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql @@ -84,13 +84,18 @@ SELECT dc.id AS collectedfromid, dc.officialname AS collectedfromname, d.typology||'@@@dnet:datasource_typologies' AS datasourcetype, + d.typology||'@@@dnet:datasource_typologies_ui' AS datasourcetypeui, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, d.issn AS issnPrinted, d.eissn AS issnOnline, - d.lissn AS issnLinking + d.lissn AS issnLinking, + de.jurisdiction AS jurisdiction, + de.thematic AS thematic, + de.knowledge_graph AS knowledgegraph, + de.content_policies AS contentpolicies FROM dsm_datasources d - +LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id) LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id) LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource) LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource) @@ -126,4 +131,8 @@ GROUP BY dc.officialname, d.issn, d.eissn, - d.lissn + d.lissn, + de.jurisdiction, + de.thematic, + de.knowledge_graph, + de.content_policies From c72c960ffb61bcd7db272d387d34fa4d044e25c3 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:03:15 +0200 Subject: [PATCH 081/287] added eosc fields --- .../eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql index f58677101..98092e882 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql @@ -89,10 +89,10 @@ SELECT d.issn AS issnPrinted, d.eissn AS issnOnline, d.lissn AS issnLinking, - de.jurisdiction AS jurisdiction, + de.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction, de.thematic AS thematic, de.knowledge_graph AS knowledgegraph, - de.content_policies AS contentpolicies + array(select unnest(de.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies FROM dsm_datasources d LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id) From 43e62fcae92b9c955201d0b2f493d22cc8b32744 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 11:04:55 +0200 Subject: [PATCH 082/287] DoiBoost AccessRigh #4362 - related to https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/126/files#issuecomment-4193 --- .../dnetlib/doiboost/DoiBoostMappingUtil.scala | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 686a2f1f1..d018948fc 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -179,20 +179,6 @@ object DoiBoostMappingUtil { } - //val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") - - - - // val pub_date = LocalDate.parse(date, formatter) - -// if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ -// val oaq : AccessRight = getOpenAccessQualifier() -// oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) -// return oaq -// } -// else{ -// return getEmbargoedAccessQualifier() -// } } return getClosedAccessQualifier() @@ -206,12 +192,12 @@ object DoiBoostMappingUtil { } def getRestrictedQualifier():AccessRight = { - OafMapperUtils.accessRight("RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + OafMapperUtils.accessRight( "RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) } def getUnknownQualifier():AccessRight = { - OafMapperUtils.accessRight("UNKNOWN","not available",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) } From 5fe016dcbccda43276342efd1547276d723ff5db Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 11:14:28 +0200 Subject: [PATCH 083/287] DoiBoost AccessRigh #4362 - related to https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/126/files#issuecomment-4194 --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index d018948fc..ea65fc747 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -188,7 +188,8 @@ object DoiBoostMappingUtil { def getOpenAccessQualifier():AccessRight = { - OafMapperUtils.accessRight("OPEN","Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + + OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN,"Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) } def getRestrictedQualifier():AccessRight = { From 80d5b3b4deb5098ba85dd701a35cfecf82b28ef9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 11:16:49 +0200 Subject: [PATCH 084/287] DoiBoost AccessRigh #4362 - removing commented code --- .../eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index ea65fc747..e68880433 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -236,8 +236,7 @@ object DoiBoostMappingUtil { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) } -// val ar = getOpenAccessQualifier() -// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) + } else { hb = ModelConstants.UNKNOWN_REPOSITORY @@ -246,17 +245,7 @@ object DoiBoostMappingUtil { }) publication.setBestaccessright(OafMapperUtils.createBestAccessRights(publication.getInstance())) -// val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid) -// if (ar.nonEmpty) { -// if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){ -// val ar = getOpenAccessQualifier() -// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) -// } -// else { -// val ar = getRestrictedQualifier() -// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) -// } -// } + publication } From e6f1773d637cc02ca3424d1c76658156cfb78399 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:17:11 +0200 Subject: [PATCH 085/287] mapping of new eosc fields --- .../raw/MigrateDbEntitiesApplication.java | 97 ++++++++++++++----- 1 file changed, 74 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index a9d3e05fe..d78732f9b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -1,8 +1,41 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTICIPANT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_MERGED_IN; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; +import static eu.dnetlib.dhp.schema.common.ModelConstants.MERGES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.asString; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; import java.io.Closeable; import java.io.IOException; @@ -53,8 +86,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class); private static final DataInfo DATA_INFO_CLAIM = dataInfo( - false, null, false, false, - qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9"); + false, null, false, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), + "0.9"); private static final List COLLECTED_FROM_CLAIM = listKeyValues( createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); @@ -140,8 +173,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i case openorgs_dedup: // generates organization entities and relations for openorgs dedup log.info("Processing Openorgs..."); smdbe - .execute( - "queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix); + .execute("queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix); log.info("Processing Openorgs Sim Rels..."); smdbe.execute("queryOpenOrgsSimilarityForOrgsDedup.sql", smdbe::processOrgOrgSimRels); @@ -150,8 +182,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i case openorgs: // generates organization entities and relations for provision log.info("Processing Openorgs For Provision..."); smdbe - .execute( - "queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix); + .execute("queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix); log.info("Processing Openorgs Merge Rels..."); smdbe.execute("queryOpenOrgsSimilarityForProvision.sql", smdbe::processOrgOrgMergeRels); @@ -229,6 +260,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB ds.setOaiprovenance(null); // Values not present in the DB ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype"))); + ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui"))); ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility"))); ds.setOfficialname(field(rs.getString("officialname"), info)); ds.setEnglishname(field(rs.getString("englishname"), info)); @@ -270,6 +302,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setDataInfo(info); ds.setLastupdatetimestamp(lastUpdateTimestamp); + ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction"))); + ds.setThematic(rs.getBoolean("thematic")); + ds.setKnowledgegraph(rs.getBoolean("knowledgegraph")); + ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies"))); + return Arrays.asList(ds); } catch (final Exception e) { throw new RuntimeException(e); @@ -495,8 +532,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i throw new IllegalStateException( String .format( - "invalid claim, sourceId: %s, targetId: %s, semantics: %s", - sourceId, targetId, semantics)); + "invalid claim, sourceId: %s, targetId: %s, semantics: %s", sourceId, targetId, + semantics)); } r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES); r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY); @@ -516,8 +553,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i } } - private Relation prepareRelation(String sourceId, String targetId, String validationDate) { - Relation r = new Relation(); + private Relation prepareRelation(final String sourceId, final String targetId, final String validationDate) { + final Relation r = new Relation(); if (StringUtils.isNotBlank(validationDate)) { r.setValidated(true); r.setValidationDate(validationDate); @@ -530,7 +567,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return r; } - private Relation setRelationSemantic(Relation r, String relType, String subRelType, String relClass) { + private Relation setRelationSemantic(final Relation r, final String relType, final String subRelType, + final String relClass) { r.setRelType(relType); r.setSubRelType(subRelType); r.setRelClass(relClass); @@ -603,6 +641,19 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return res; } + private List prepareListOfQualifiers(final Array array) throws SQLException { + final List res = new ArrayList<>(); + if (array != null) { + for (final String s : (String[]) array.getArray()) { + final Qualifier q = prepareQualifierSplitting(s); + if (q != null) { + res.add(q); + } + } + } + return res; + } + public List processOrgOrgMergeRels(final ResultSet rs) { try { final DataInfo info = prepareDataInfo(rs); // TODO @@ -660,16 +711,16 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i r1.setLastupdatetimestamp(lastUpdateTimestamp); // removed because there's no difference between two sides //TODO -// final Relation r2 = new Relation(); -// r2.setRelType(ORG_ORG_RELTYPE); -// r2.setSubRelType(ORG_ORG_SUBRELTYPE); -// r2.setRelClass(relClass); -// r2.setSource(orgId2); -// r2.setTarget(orgId1); -// r2.setCollectedfrom(collectedFrom); -// r2.setDataInfo(info); -// r2.setLastupdatetimestamp(lastUpdateTimestamp); -// return Arrays.asList(r1, r2); + // final Relation r2 = new Relation(); + // r2.setRelType(ORG_ORG_RELTYPE); + // r2.setSubRelType(ORG_ORG_SUBRELTYPE); + // r2.setRelClass(relClass); + // r2.setSource(orgId2); + // r2.setTarget(orgId1); + // r2.setCollectedfrom(collectedFrom); + // r2.setDataInfo(info); + // r2.setLastupdatetimestamp(lastUpdateTimestamp); + // return Arrays.asList(r1, r2); return Arrays.asList(r1); } catch (final Exception e) { From 3d2bba3d5d6f346d3fed3fc80511e9bed78fa92c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 11:25:43 +0200 Subject: [PATCH 086/287] removing not needed classes --- .../main/java/eu/dnetlib/doiboost/GetCSV.java | 37 ----- .../eu/dnetlib/doiboost/UnibiGoldModel.java | 151 ------------------ 2 files changed, 188 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java delete mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java deleted file mode 100644 index 00b6b184b..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java +++ /dev/null @@ -1,37 +0,0 @@ - -package eu.dnetlib.doiboost; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - -public class GetCSV { - private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV.class); - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - GetCSV.class - .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json"))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - final String hdfsPath = parser.get("hdfsPath"); - final String hdfsNameNode = parser.get("hdfsNameNode"); - final String classForName = parser.get("classForName"); - - try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, ',')) { - - log.info("Getting CSV file..."); - readCSV.execute(classForName); - - } - } - -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java deleted file mode 100644 index e5bd49ada..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java +++ /dev/null @@ -1,151 +0,0 @@ - -package eu.dnetlib.doiboost; - -import java.io.Serializable; - -public class UnibiGoldModel implements Serializable { - private String ISSN; - private String ISSN_L; - private String ISSN_IN_DOAJ; - private String ISSN_IN_ROAD; - private String ISSN_IN_PMC; - private String ISSN_IN_OAPC; - private String ISSN_IN_WOS; - private String ISSN_IN_SCOPUS; - private String JOURNAL_IN_DOAJ; - private String JOURNAL_IN_ROAD; - private String JOURNAL_IN_PMC; - private String JOURNAL_IN_OAPC; - private String JOURNAL_IN_WOS; - private String JOURNAL_IN_SCOPUS; - private String TITLE; - private String TITLE_SOURCE; - - public String getISSN() { - return ISSN; - } - - public void setISSN(String ISSN) { - this.ISSN = ISSN; - } - - public String getISSN_L() { - return ISSN_L; - } - - public void setISSN_L(String ISSN_L) { - this.ISSN_L = ISSN_L; - } - - public String getISSN_IN_DOAJ() { - return ISSN_IN_DOAJ; - } - - public void setISSN_IN_DOAJ(String ISSN_IN_DOAJ) { - this.ISSN_IN_DOAJ = ISSN_IN_DOAJ; - } - - public String getISSN_IN_ROAD() { - return ISSN_IN_ROAD; - } - - public void setISSN_IN_ROAD(String ISSN_IN_ROAD) { - this.ISSN_IN_ROAD = ISSN_IN_ROAD; - } - - public String getISSN_IN_PMC() { - return ISSN_IN_PMC; - } - - public void setISSN_IN_PMC(String ISSN_IN_PMC) { - this.ISSN_IN_PMC = ISSN_IN_PMC; - } - - public String getISSN_IN_OAPC() { - return ISSN_IN_OAPC; - } - - public void setISSN_IN_OAPC(String ISSN_IN_OAPC) { - this.ISSN_IN_OAPC = ISSN_IN_OAPC; - } - - public String getISSN_IN_WOS() { - return ISSN_IN_WOS; - } - - public void setISSN_IN_WOS(String ISSN_IN_WOS) { - this.ISSN_IN_WOS = ISSN_IN_WOS; - } - - public String getISSN_IN_SCOPUS() { - return ISSN_IN_SCOPUS; - } - - public void setISSN_IN_SCOPUS(String ISSN_IN_SCOPUS) { - this.ISSN_IN_SCOPUS = ISSN_IN_SCOPUS; - } - - public String getJOURNAL_IN_DOAJ() { - return JOURNAL_IN_DOAJ; - } - - public void setJOURNAL_IN_DOAJ(String JOURNAL_IN_DOAJ) { - this.JOURNAL_IN_DOAJ = JOURNAL_IN_DOAJ; - } - - public String getJOURNAL_IN_ROAD() { - return JOURNAL_IN_ROAD; - } - - public void setJOURNAL_IN_ROAD(String JOURNAL_IN_ROAD) { - this.JOURNAL_IN_ROAD = JOURNAL_IN_ROAD; - } - - public String getJOURNAL_IN_PMC() { - return JOURNAL_IN_PMC; - } - - public void setJOURNAL_IN_PMC(String JOURNAL_IN_PMC) { - this.JOURNAL_IN_PMC = JOURNAL_IN_PMC; - } - - public String getJOURNAL_IN_OAPC() { - return JOURNAL_IN_OAPC; - } - - public void setJOURNAL_IN_OAPC(String JOURNAL_IN_OAPC) { - this.JOURNAL_IN_OAPC = JOURNAL_IN_OAPC; - } - - public String getJOURNAL_IN_WOS() { - return JOURNAL_IN_WOS; - } - - public void setJOURNAL_IN_WOS(String JOURNAL_IN_WOS) { - this.JOURNAL_IN_WOS = JOURNAL_IN_WOS; - } - - public String getJOURNAL_IN_SCOPUS() { - return JOURNAL_IN_SCOPUS; - } - - public void setJOURNAL_IN_SCOPUS(String JOURNAL_IN_SCOPUS) { - this.JOURNAL_IN_SCOPUS = JOURNAL_IN_SCOPUS; - } - - public String getTITLE() { - return TITLE; - } - - public void setTITLE(String TITLE) { - this.TITLE = TITLE; - } - - public String getTITLE_SOURCE() { - return TITLE_SOURCE; - } - - public void setTITLE_SOURCE(String TITLE_SOURCE) { - this.TITLE_SOURCE = TITLE_SOURCE; - } -} From 9f1c7b8e177f479289fda28f933de5bab04fb7ce Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:32:34 +0200 Subject: [PATCH 087/287] tests --- .../raw/MigrateDbEntitiesApplicationTest.java | 192 ++++++++++-------- .../raw/datasources_resultset_entry.json | 27 +++ 2 files changed, 132 insertions(+), 87 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index d529d2eb2..2078735f3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -28,7 +28,12 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @ExtendWith(MockitoExtension.class) @@ -46,11 +51,8 @@ public class MigrateDbEntitiesApplicationTest { public void setUp() { lenient() .when(vocs.getTermAsQualifier(anyString(), anyString())) - .thenAnswer( - invocation -> OafMapperUtils - .qualifier( - invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), - invocation.getArgument(0))); + .thenAnswer(invocation -> OafMapperUtils + .qualifier(invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), invocation.getArgument(0))); lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true); @@ -78,6 +80,23 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted()); assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline()); assertEquals(getValueAsString("issnLinking", fields), ds.getJournal().getIssnLinking()); + + assertEquals("pubsrepository::journal", ds.getDatasourcetype().getClassid()); + assertEquals("dnet:datasource_typologies", ds.getDatasourcetype().getSchemeid()); + + assertEquals("pubsrepository::journal", ds.getDatasourcetypeui().getClassid()); + assertEquals("dnet:datasource_typologies_ui", ds.getDatasourcetypeui().getSchemeid()); + + assertEquals("National", ds.getJurisdiction().getClassid()); + assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid()); + + assertTrue(ds.getThematic()); + assertTrue(ds.getKnowledgegraph()); + + assertEquals(1, ds.getContentpolicies().size()); + assertEquals("Journal article", ds.getContentpolicies().get(0).getClassid()); + assertEquals("eosc:contentpolicies", ds.getContentpolicies().get(0).getSchemeid()); + } @Test @@ -119,7 +138,7 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemeid()); assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemename()); assertEquals(getValueAsString("collectedfromname", fields), o.getCollectedfrom().get(0).getValue()); - List alternativenames = getValueAsList("alternativenames", fields); + final List alternativenames = getValueAsList("alternativenames", fields); assertEquals(2, alternativenames.size()); assertTrue(alternativenames.contains("Pippo")); assertTrue(alternativenames.contains("Foo")); @@ -216,73 +235,72 @@ public class MigrateDbEntitiesApplicationTest { private List prepareMocks(final String jsonFile) throws IOException, SQLException { final String json = IOUtils.toString(getClass().getResourceAsStream(jsonFile)); final ObjectMapper mapper = new ObjectMapper(); - final List list = mapper.readValue(json, new TypeReference>() { - }); + final List list = mapper.readValue(json, new TypeReference>() {}); for (final TypedField tf : list) { if (tf.getValue() == null) { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito.when(rs.getBoolean(tf.getField())).thenReturn(false); - break; - case "date": - Mockito.when(rs.getDate(tf.getField())).thenReturn(null); - break; - case "int": - Mockito.when(rs.getInt(tf.getField())).thenReturn(0); - break; - case "double": - Mockito.when(rs.getDouble(tf.getField())).thenReturn(0.0); - break; - case "array": - Mockito.when(rs.getArray(tf.getField())).thenReturn(null); - break; - case "string": - default: - Mockito.when(rs.getString(tf.getField())).thenReturn(null); - break; + case "not_used": + break; + case "boolean": + Mockito.when(rs.getBoolean(tf.getField())).thenReturn(false); + break; + case "date": + Mockito.when(rs.getDate(tf.getField())).thenReturn(null); + break; + case "int": + Mockito.when(rs.getInt(tf.getField())).thenReturn(0); + break; + case "double": + Mockito.when(rs.getDouble(tf.getField())).thenReturn(0.0); + break; + case "array": + Mockito.when(rs.getArray(tf.getField())).thenReturn(null); + break; + case "string": + default: + Mockito.when(rs.getString(tf.getField())).thenReturn(null); + break; } } else { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito - .when(rs.getBoolean(tf.getField())) - .thenReturn(Boolean.parseBoolean(tf.getValue().toString())); - break; - case "date": - Mockito - .when(rs.getDate(tf.getField())) - .thenReturn(Date.valueOf(tf.getValue().toString())); - break; - case "int": - Mockito - .when(rs.getInt(tf.getField())) - .thenReturn(new Integer(tf.getValue().toString())); - break; - case "double": - Mockito - .when(rs.getDouble(tf.getField())) - .thenReturn(new Double(tf.getValue().toString())); - break; - case "array": - final Array arr = Mockito.mock(Array.class); - final String[] values = ((List) tf.getValue()) - .stream() - .filter(Objects::nonNull) - .map(o -> o.toString()) - .toArray(String[]::new); + case "not_used": + break; + case "boolean": + Mockito + .when(rs.getBoolean(tf.getField())) + .thenReturn(Boolean.parseBoolean(tf.getValue().toString())); + break; + case "date": + Mockito + .when(rs.getDate(tf.getField())) + .thenReturn(Date.valueOf(tf.getValue().toString())); + break; + case "int": + Mockito + .when(rs.getInt(tf.getField())) + .thenReturn(new Integer(tf.getValue().toString())); + break; + case "double": + Mockito + .when(rs.getDouble(tf.getField())) + .thenReturn(new Double(tf.getValue().toString())); + break; + case "array": + final Array arr = Mockito.mock(Array.class); + final String[] values = ((List) tf.getValue()) + .stream() + .filter(Objects::nonNull) + .map(o -> o.toString()) + .toArray(String[]::new); - Mockito.when(arr.getArray()).thenReturn(values); - Mockito.when(rs.getArray(tf.getField())).thenReturn(arr); - break; - case "string": - default: - Mockito.when(rs.getString(tf.getField())).thenReturn(tf.getValue().toString()); - break; + Mockito.when(arr.getArray()).thenReturn(values); + Mockito.when(rs.getArray(tf.getField())).thenReturn(arr); + break; + case "string": + default: + Mockito.when(rs.getString(tf.getField())).thenReturn(tf.getValue().toString()); + break; } } } @@ -294,27 +312,27 @@ public class MigrateDbEntitiesApplicationTest { for (final TypedField tf : list) { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito.verify(rs, Mockito.atLeastOnce()).getBoolean(tf.getField()); - break; - case "date": - Mockito.verify(rs, Mockito.atLeastOnce()).getDate(tf.getField()); - break; - case "int": - Mockito.verify(rs, Mockito.atLeastOnce()).getInt(tf.getField()); - break; - case "double": - Mockito.verify(rs, Mockito.atLeastOnce()).getDouble(tf.getField()); - break; - case "array": - Mockito.verify(rs, Mockito.atLeastOnce()).getArray(tf.getField()); - break; - case "string": - default: - Mockito.verify(rs, Mockito.atLeastOnce()).getString(tf.getField()); - break; + case "not_used": + break; + case "boolean": + Mockito.verify(rs, Mockito.atLeastOnce()).getBoolean(tf.getField()); + break; + case "date": + Mockito.verify(rs, Mockito.atLeastOnce()).getDate(tf.getField()); + break; + case "int": + Mockito.verify(rs, Mockito.atLeastOnce()).getInt(tf.getField()); + break; + case "double": + Mockito.verify(rs, Mockito.atLeastOnce()).getDouble(tf.getField()); + break; + case "array": + Mockito.verify(rs, Mockito.atLeastOnce()).getArray(tf.getField()); + break; + case "string": + default: + Mockito.verify(rs, Mockito.atLeastOnce()).getString(tf.getField()); + break; } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json index befa722e1..42b140306 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json @@ -222,6 +222,11 @@ "type": "string", "value": "pubsrepository::journal@@@dnet:datasource_typologies" }, + { + "field": "datasourcetypeui", + "type": "string", + "value": "pubsrepository::journal@@@dnet:datasource_typologies_ui" + }, { "field": "provenanceaction", "type": "not_used", @@ -241,5 +246,27 @@ "field": "issnLinking", "type": "string", "value": "2579-5447" + }, + { + "field": "jurisdiction", + "type": "string", + "value": "National@@@eosc:jurisdictions" + }, + { + "field": "thematic", + "type": "boolean", + "value": true + }, + { + "field": "knowledgegraph", + "type": "boolean", + "value": true + }, + { + "field": "contentpolicies", + "type": "array", + "value": [ + "Journal article@@@eosc:contentpolicies" + ] } ] From 2fff24df55f2bafd2b0f67837d2f266dcf934fa8 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 28 Jul 2021 11:34:19 +0200 Subject: [PATCH 088/287] code formatting --- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 3 +- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 28 ++++++++++--------- .../oa/provision/utils/XmlRecordFactory.java | 2 +- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 3f27b9442..58009bfcf 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,8 +38,7 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class -GroupEntitiesSparkJob { +public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c41a6c68c..63f18a803 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,13 +1,13 @@ package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -16,12 +16,14 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import java.io.IOException; -import java.util.List; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.lenient; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -340,7 +342,7 @@ public class MappersTest { assertEquals(2, p.getOriginalId().size()); assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); - //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + // assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index a985d2371..2c8240290 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -16,7 +16,6 @@ import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -43,6 +42,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { From 9594343725ca1bf512da19aff1c2bdd4e98c6557 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 28 Jul 2021 11:41:34 +0200 Subject: [PATCH 089/287] code formatting after mvn compile --- .../dhp/oa/provision/utils/XmlRecordFactory.java | 2 +- .../dhp/oa/provision/XmlRecordFactoryTest.java | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index a985d2371..2c8240290 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -16,7 +16,6 @@ import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -43,6 +42,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index a5a1563aa..897e507e1 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -7,8 +7,6 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; -import eu.dnetlib.dhp.oa.provision.utils.ContextDef; -import eu.dnetlib.dhp.schema.oaf.Dataset; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -25,6 +23,7 @@ import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -137,17 +136,18 @@ public class XmlRecordFactoryTest { @Test public void testEnermapsRecord() throws IOException, DocumentException { - String contextmap = "" + - ""+ - ""+ - ""; + String contextmap = "" + + + "" + + "" + + ""; ContextMapper contextMapper = ContextMapper.fromXml(contextmap); XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + otherDsTypeId); Dataset d = OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); + .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); JoinedEntity je = new JoinedEntity<>(d); From c806387d4bfa74491375afbc80c39d67547ce9f6 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 20 Jul 2021 19:31:43 +0200 Subject: [PATCH 090/287] tests for enermaps --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 25 +++++++ .../eu/dnetlib/dhp/oa/graph/raw/enermaps.xml | 72 +++++++++++++++++++ .../oa/provision/XmlRecordFactoryTest.java | 29 ++++++++ .../eu/dnetlib/dhp/oa/provision/enermaps.json | 1 + 4 files changed, 127 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 63f18a803..fb4a5b5da 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -559,6 +559,31 @@ public class MappersTest { assertNotNull(d.getInstance().get(0).getUrl()); } + @Test + void testEnermaps() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("enermaps.xml")); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + assertEquals(1, list.size()); + assertTrue(list.get(0) instanceof Dataset); + + final Dataset d = (Dataset) list.get(0); + + assertValidId(d.getId()); + assertValidId(d.getCollectedfrom().get(0).getKey()); + assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); + assertEquals(1, d.getAuthor().size()); + assertEquals(1, d.getInstance().size()); + assertNotNull(d.getInstance().get(0).getUrl()); + assertNotNull(d.getContext()); + assertTrue(StringUtils.isNotBlank(d.getContext().get(0).getId())); + assertEquals("enermaps::selection::tgs00004", d.getContext().get(0).getId()); + } + @Test void testClaimFromCrossref() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_crossref.xml")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml new file mode 100644 index 000000000..362b40c85 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml @@ -0,0 +1,72 @@ + + + + enermaps____::04149ee428d07360314c2cb3ba95d41e + tgs00004 + 2021-07-20T18:43:12.096+02:00 + enermaps____ + + + + https://ec.europa.eu/eurostat/web/products-datasets/-/tgs00004 + + + Statistical Office of the European Union (Eurostat) + + + + + Regional GDP + + + Statistical Office of the European Union (Eurostat) + 2020 + + 2020-10-07 + + + + OPEN + Creative Commons Attribution 4.0 International + + + GDP expressed in PPS (purchasing power standards) eliminates differences in price levels between countries. Calculations on a per inhabitant basis allow for the comparison of economies and regions significantly different in absolute size. GDP per inhabitant in PPS is the key variable for determining the eligibility of NUTS 2 regions in the framework of the European Unions structural policy. + + 0021 + 2020-10-07 + OPEN + Creative Commons Attribution 4.0 International + + + + + + + + + https%3A%2F%2Flab.idiap.ch%2Fenermaps%2Fapi%2Fdatacite + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 6631cb4da..a5a1563aa 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -7,6 +7,8 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; +import eu.dnetlib.dhp.oa.provision.utils.ContextDef; +import eu.dnetlib.dhp.schema.oaf.Dataset; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -131,4 +133,31 @@ public class XmlRecordFactoryTest { System.out.println(doc.asXML()); assertEquals("", doc.valueOf("//rel/validated")); } + + @Test + public void testEnermapsRecord() throws IOException, DocumentException { + + String contextmap = "" + + ""+ + ""+ + ""; + + ContextMapper contextMapper = ContextMapper.fromXml(contextmap); + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + otherDsTypeId); + + Dataset d = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); + + JoinedEntity je = new JoinedEntity<>(d); + + String xml = xmlRecordFactory.build(je); + + assertNotNull(xml); + + Document doc = new SAXReader().read(new StringReader(xml)); + assertNotNull(doc); + System.out.println(doc.asXML()); + assertEquals("enermaps::selection::tgs00004", doc.valueOf("//concept/@id")); + } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json new file mode 100644 index 000000000..dcd4c2ee1 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|enermaps____::d77d5e503ad1439f585ac494268b351b","value":"Enermaps","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626800904248,"id":"50|enermaps____::04149ee428d07360314c2cb3ba95d41e","originalId":["50|enermaps____::04149ee428d07360314c2cb3ba95d41e","tgs00004"],"pid":[],"dateofcollection":"2021-07-20T18:43:12.096+02:00","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-07-20T18:43:12.096+02:00","altered":true,"baseURL":"https%3A%2F%2Flab.idiap.ch%2Fenermaps%2Fapi%2Fdatacite","identifier":"","datestamp":"","metadataNamespace":""}},"measures":null,"author":[{"fullname":"Statistical Office of the European Union (Eurostat)","name":"","surname":"","rank":1,"pid":[],"affiliation":[]}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"title":[{"value":"\n Regional GDP\n ","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[{"value":"2020-10-07","qualifier":{"classid":"Issued","classname":"Issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"description":[{"value":"GDP expressed in PPS (purchasing power standards) eliminates differences in price levels between countries. Calculations on a per inhabitant basis allow for the comparison of economies and regions significantly different in absolute size. GDP per inhabitant in PPS is the key variable for determining the eligibility of NUTS 2 regions in the framework of the European Unions structural policy.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2020-10-07","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Statistical Office of the European Union (Eurostat)","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"id":"enermaps::selection::tgs00004","dataInfo":[{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}]}],"externalReference":[],"instance":[{"license":{"value":"Creative Commons Attribution 4.0 International","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://ec.europa.eu/eurostat/web/products-datasets/-/tgs00004"],"distributionlocation":null,"collectedfrom":{"key":"10|enermaps____::d77d5e503ad1439f585ac494268b351b","value":"Enermaps","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2020-10-07","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"storagedate":{"value":"2020-10-07","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} From 3e2a2d6e71c4cb9820c858ed114cea9b9ac25021 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:56:55 +0200 Subject: [PATCH 091/287] added new fields in xml --- .../CreateRelatedEntitiesJob_phase1.java | 92 ++++++++++--------- .../oa/provision/utils/XmlRecordFactory.java | 24 +++++ 2 files changed, 73 insertions(+), 43 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 7534ce4bd..b2abbc156 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -27,14 +27,20 @@ import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; import scala.Tuple2; /** - * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type - * E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join - * (R.target = T_i.id) save the tuples (R_i, T_i) + * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type E_i map E_i as RelatedEntity + * T_i to simplify the model and extracting only the necessary information join (R.target = T_i.id) save the tuples (R_i, T_i) */ public class CreateRelatedEntitiesJob_phase1 { @@ -42,68 +48,65 @@ public class CreateRelatedEntitiesJob_phase1 { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(final String[] args) throws Exception { - String jsonConfiguration = IOUtils + final String jsonConfiguration = IOUtils .toString( PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); + .getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputRelationsPath = parser.get("inputRelationsPath"); + final String inputRelationsPath = parser.get("inputRelationsPath"); log.info("inputRelationsPath: {}", inputRelationsPath); - String inputEntityPath = parser.get("inputEntityPath"); + final String inputEntityPath = parser.get("inputEntityPath"); log.info("inputEntityPath: {}", inputEntityPath); - String outputPath = parser.get("outputPath"); + final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - String graphTableClassName = parser.get("graphTableClassName"); + final String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); - Class entityClazz = (Class) Class.forName(graphTableClassName); + final Class entityClazz = (Class) Class.forName(graphTableClassName); - SparkConf conf = new SparkConf(); + final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ProvisionModelSupport.getModelClasses()); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath); - }); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath); + }); } private static void joinRelationEntity( - SparkSession spark, - String inputRelationsPath, - String inputEntityPath, - Class clazz, - String outputPath) { + final SparkSession spark, + final String inputRelationsPath, + final String inputEntityPath, + final Class clazz, + final String outputPath) { - Dataset> relsByTarget = readPathRelation(spark, inputRelationsPath) + final Dataset> relsByTarget = readPathRelation(spark, inputRelationsPath) .map( (MapFunction>) r -> new Tuple2<>(r.getTarget(), r), Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class))) .cache(); - Dataset> entities = readPathEntity(spark, inputEntityPath, clazz) + final Dataset> entities = readPathEntity(spark, inputEntityPath, clazz) .filter("dataInfo.invisible == false") .map( (MapFunction>) e -> new Tuple2<>(e.getId(), asRelatedEntity(e, clazz)), - Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) + Encoders + .tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) .cache(); relsByTarget @@ -118,7 +121,9 @@ public class CreateRelatedEntitiesJob_phase1 { } private static Dataset readPathEntity( - SparkSession spark, String inputEntityPath, Class entityClazz) { + final SparkSession spark, + final String inputEntityPath, + final Class entityClazz) { log.info("Reading Graph table from: {}", inputEntityPath); return spark @@ -129,7 +134,7 @@ public class CreateRelatedEntitiesJob_phase1 { Encoders.bean(entityClazz)); } - public static RelatedEntity asRelatedEntity(E entity, Class clazz) { + public static RelatedEntity asRelatedEntity(final E entity, final Class clazz) { final RelatedEntity re = new RelatedEntity(); re.setId(entity.getId()); @@ -143,7 +148,7 @@ public class CreateRelatedEntitiesJob_phase1 { case dataset: case otherresearchproduct: case software: - Result result = (Result) entity; + final Result result = (Result) entity; if (result.getTitle() != null && !result.getTitle().isEmpty()) { final StructuredProperty title = result.getTitle().stream().findFirst().get(); @@ -170,16 +175,17 @@ public class CreateRelatedEntitiesJob_phase1 { break; case datasource: - Datasource d = (Datasource) entity; + final Datasource d = (Datasource) entity; re.setOfficialname(getValue(d.getOfficialname())); re.setWebsiteurl(getValue(d.getWebsiteurl())); re.setDatasourcetype(d.getDatasourcetype()); + re.setDatasourcetypeui(d.getDatasourcetypeui()); re.setOpenairecompatibility(d.getOpenairecompatibility()); break; case organization: - Organization o = (Organization) entity; + final Organization o = (Organization) entity; re.setLegalname(getValue(o.getLegalname())); re.setLegalshortname(getValue(o.getLegalshortname())); @@ -187,14 +193,14 @@ public class CreateRelatedEntitiesJob_phase1 { re.setWebsiteurl(getValue(o.getWebsiteurl())); break; case project: - Project p = (Project) entity; + final Project p = (Project) entity; re.setProjectTitle(getValue(p.getTitle())); re.setCode(getValue(p.getCode())); re.setAcronym(getValue(p.getAcronym())); re.setContracttype(p.getContracttype()); - List> f = p.getFundingtree(); + final List> f = p.getFundingtree(); if (!f.isEmpty()) { re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList())); } @@ -203,11 +209,11 @@ public class CreateRelatedEntitiesJob_phase1 { return re; } - private static String getValue(Field field) { + private static String getValue(final Field field) { return getFieldValueWithDefault(field, ""); } - private static T getFieldValueWithDefault(Field f, T defaultValue) { + private static T getFieldValueWithDefault(final Field f, final T defaultValue) { return Optional .ofNullable(f) .filter(Objects::nonNull) @@ -216,21 +222,21 @@ public class CreateRelatedEntitiesJob_phase1 { } /** - * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text - * file, + * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text file, * * @param spark * @param relationPath * @return the Dataset containing all the relationships */ private static Dataset readPathRelation( - SparkSession spark, final String relationPath) { + final SparkSession spark, + final String relationPath) { log.info("Reading relations from: {}", relationPath); return spark.read().load(relationPath).as(Encoders.bean(Relation.class)); } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 392d3cde6..19300d77d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -735,6 +735,30 @@ public class XmlRecordFactory implements Serializable { .collect(Collectors.toList())); } + if (ds.getJurisdiction() != null) { + metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction())); + } + + if (ds.getThematic() != null) { + metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString())); + } + + if (ds.getKnowledgegraph() != null) { + metadata + .add(XmlSerializationUtils.asXmlElement("knowledgegraph", ds.getKnowledgegraph().toString())); + } + + if (ds.getContentpolicies() != null) { + metadata + .addAll( + ds + .getContentpolicies() + .stream() + .filter(Objects::nonNull) + .map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q)) + .collect(Collectors.toList())); + } + break; case organization: final Organization o = (Organization) entity; From df8715a1ecc57b2221a960526700f53d1f6cb676 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 28 Jul 2021 11:58:26 +0200 Subject: [PATCH 092/287] format code after mvn compile --- .../dhp/oa/provision/XmlRecordFactoryTest.java | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index a5a1563aa..221049f90 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -7,8 +7,6 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; -import eu.dnetlib.dhp.oa.provision.utils.ContextDef; -import eu.dnetlib.dhp.schema.oaf.Dataset; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -23,8 +21,10 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; +import eu.dnetlib.dhp.oa.provision.utils.ContextDef; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -137,17 +137,18 @@ public class XmlRecordFactoryTest { @Test public void testEnermapsRecord() throws IOException, DocumentException { - String contextmap = "" + - ""+ - ""+ - ""; + String contextmap = "" + + + "" + + "" + + ""; ContextMapper contextMapper = ContextMapper.fromXml(contextmap); XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + otherDsTypeId); Dataset d = OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); + .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); JoinedEntity je = new JoinedEntity<>(d); From 3d8f0f629b3ff2c9d2b7401b7e62381f1547531c Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 28 Jul 2021 16:15:15 +0200 Subject: [PATCH 093/287] implemented workflow of creation action set for scholexplorer --- .../datacite/AbstractRestClient.scala | 32 ++++++++--------- dhp-workflows/dhp-graph-provision/pom.xml | 35 +++++++++++++++++++ .../sx/provision/SparkCreateActionset.scala | 31 ++++------------ .../dhp/sx/actionset/oozie_app/workflow.xml | 8 ++--- 4 files changed, 61 insertions(+), 45 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala index 823187afe..92a870e37 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala @@ -64,26 +64,24 @@ abstract class AbstractRestClient extends Iterator[String]{ .setSocketTimeout(timeout * 1000).build() val client =HttpClientBuilder.create().setDefaultRequestConfig(config).build() var tries = 4 - try { - while (tries > 0) { + while (tries > 0) { println(s"requesting ${r.getURI}") - val response = client.execute(r) - println(s"get response with status${response.getStatusLine.getStatusCode}") - if (response.getStatusLine.getStatusCode > 400) { - tries -= 1 + try { + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + if (response.getStatusLine.getStatusCode > 400) { + tries -= 1 + } + else + return IOUtils.toString(response.getEntity.getContent) + } catch { + case e: Throwable => + println(s"Error on requesting ${r.getURI}") + e.printStackTrace() + tries-=1 } - else - return IOUtils.toString(response.getEntity.getContent) } "" - } catch { - case e: Throwable => - throw new RuntimeException("Error on executing request ", e) - } finally try client.close() - catch { - case e: IOException => - throw new RuntimeException("Unable to close client ", e) - } - } + } getBufferData() } \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index c279436d7..e402d0600 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -9,6 +9,41 @@ dhp-graph-provision + + + + net.alchim31.maven + scala-maven-plugin + 4.0.1 + + + scala-compile-first + initialize + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + -Xmax-classfile-name + 200 + + ${scala.version} + + + + + + diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala index 6f0cdcf8a..faf386d25 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala @@ -43,7 +43,7 @@ object SparkCreateActionset { val relation = spark.read.load(s"$sourcePath/relation").as[Relation] relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) - .flatMap(r => List(r.getSource,r.getTarget)).distinct().write.save(s"$workingDirFolder/id_relation") + .flatMap(r => List(r.getSource,r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation") val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String] @@ -56,35 +56,18 @@ object SparkCreateActionset { relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf") - log.info("saving publication") + log.info("saving entities") - val publication:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/publication").as[Result].map(p => (p.getId, p)) + val entities:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders)) - publication - .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + + entities.filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) + entities + .joinWith(idRelation, entities("_1").equalTo(idRelation("value"))) .map(p => p._1._2) .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") - log.info("saving dataset") - val dataset:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/dataset").as[Result].map(p => (p.getId, p)) - dataset - .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) - .map(p => p._1._2) - .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") - log.info("saving software") - val software:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/software").as[Result].map(p => (p.getId, p)) - software - .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) - .map(p => p._1._2) - .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") - - log.info("saving Other Research product") - val orp:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/otherresearchproduct").as[Result].map(p => (p.getId, p)) - orp - .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) - .map(p => p._1._2) - .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") } } diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml index ef86a1772..7c4b3dd26 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml @@ -14,7 +14,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -26,7 +26,7 @@ cluster Create Action Set eu.dnetlib.dhp.sx.provision.SparkCreateActionset - dhp-aggregation-${projectVersion}.jar + dhp-graph-provision-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -42,7 +42,7 @@ --workingDirFolder${workingDirFolder} --masteryarn-cluster - + @@ -53,7 +53,7 @@ cluster Save Action Set eu.dnetlib.dhp.sx.provision.SparkSaveActionSet - dhp-aggregation-${projectVersion}.jar + dhp-graph-provision-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} From 6dddad86ee10543a2d96e3fe7a555bd287492e0c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 28 Jul 2021 16:21:29 +0200 Subject: [PATCH 094/287] [cleaning] title cleaning based on the me.xuender:unidecode library --- dhp-common/pom.xml | 5 ++ .../oaf/utils/GraphCleaningFunctions.java | 19 ++--- .../schema/oaf/utils/OafMapperUtilsTest.java | 25 +++++-- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 26 +++++++ .../eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml | 70 +++++++++++++++++++ pom.xml | 5 ++ 6 files changed, 136 insertions(+), 14 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 74f31cf35..4c7810c47 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -25,6 +25,11 @@ com.github.sisyphsu dateparser + + me.xuender + unidecode + + org.apache.spark spark-core_2.11 diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index e5181b111..1d002ed7e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -7,22 +7,19 @@ import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.*; import java.util.function.Function; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; -import org.jetbrains.annotations.NotNull; import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.collect.Sets; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import me.xuender.unidecode.Unidecode; public class GraphCleaningFunctions extends CleaningFunctions { @@ -194,11 +191,15 @@ public class GraphCleaningFunctions extends CleaningFunctions { .filter(Objects::nonNull) .filter(sp -> StringUtils.isNotBlank(sp.getValue())) .filter( - sp -> sp - .getValue() - .toLowerCase() - .replaceAll(TITLE_FILTER_REGEX, "") - .length() > TITLE_FILTER_RESIDUAL_LENGTH) + sp -> { + final String title = sp + .getValue() + .toLowerCase(); + final String residual = Unidecode + .decode(title) + .replaceAll(TITLE_FILTER_REGEX, ""); + return residual.length() > TITLE_FILTER_RESIDUAL_LENGTH; + }) .map(GraphCleaningFunctions::cleanValue) .collect(Collectors.toList())); } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index eefa1e9a3..8d519a93f 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -4,12 +4,8 @@ package eu.dnetlib.dhp.schema.oaf.utils; import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; import java.util.HashSet; import java.util.List; -import java.util.Locale; -import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -19,13 +15,32 @@ import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; +import me.xuender.unidecode.Unidecode; public class OafMapperUtilsTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + @Test + public void testUnidecode() { + + assertEquals("Liu Ben Mu hiruzuSen tawa", Unidecode.decode("六本木ヒルズ森タワ")); + assertEquals("Nan Wu A Mi Tuo Fo", Unidecode.decode("南无阿弥陀佛")); + assertEquals("Yi Tiao Hui Zou Lu De Yu", Unidecode.decode("一条会走路的鱼")); + assertEquals("amidaniyorai", Unidecode.decode("あみだにょらい")); + assertEquals("T`owrk`iayi", Unidecode.decode("Թուրքիայի")); + assertEquals("Obzor tematiki", Unidecode.decode("Обзор тематики")); + assertEquals("GERMANSKIE IaZYKI", Unidecode.decode("ГЕРМАНСКИЕ ЯЗЫКИ")); + assertEquals("Diereunese tes ikanopoieses", Unidecode.decode("Διερεύνηση της ικανοποίησης")); + assertEquals("lqDy l'wly@", Unidecode.decode("القضايا الأولية")); + assertEquals("abc def ghi", Unidecode.decode("abc def ghi")); + } + @Test public void testDateValidation() { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 63f18a803..ba4211a3f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.oa.graph.raw; +import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.cleanup; +import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.fixVocabularyNames; import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; @@ -640,6 +642,30 @@ public class MappersTest { System.out.println(p.getTitle().get(0).getValue()); } + @Test + void testJairo() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_jairo.xml")); + final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Publication p = (Publication) list.get(0); + assertValidId(p.getId()); + assertValidId(p.getCollectedfrom().get(0).getKey()); + + assertNotNull(p.getTitle()); + assertFalse(p.getTitle().isEmpty()); + assertTrue(p.getTitle().size() == 1); + assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); + + final Publication p_cleaned = cleanup(fixVocabularyNames(p)); + + assertNotNull(p_cleaned.getTitle()); + assertFalse(p_cleaned.getTitle().isEmpty()); + } + @Test void testOdfFromHdfs() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_from_hdfs.xml")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml new file mode 100644 index 000000000..9ec696256 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml @@ -0,0 +1,70 @@ + + +
+ jairo_______::000012e58ed836576ef2a0d38b0f726f + oai:irdb.nii.ac.jp:01221:0000010198 + + + + + + 2021-05-10T11:31:09.424Z + 2021-06-03T01:45:42.536Z + jairo_______ +
+ + 多項式GCDを用いた復号法に関する研究 + 上原, 剛 + 甲斐, 博 + 野田, 松太郎 + application/pdf + http://hdl.handle.net/2433/25934 + jpn + 京都大学数理解析研究所 + 410 + Departmental Bulletin Paper + 0014 + 2004-10-01 + + openaire____::554c7c2873 + OPEN + + + 2433/25934 + AN00061013 + http://hdl.handle.net/2433/25934 + http://repository.kulib.kyoto-u.ac.jp/dspace/bitstream/2433/25934/1/1395-16.pdf + 数理解析研究所講究録 + + + + + https%3A%2F%2Firdb.nii.ac.jp%2Foai + oai:irdb.nii.ac.jp:01221:0000010198 + 2021-04-13T13:36:29Z + + + http://repository.kulib.kyoto-u.ac.jp/dspace-oai/request + oai:repository.kulib.kyoto-u.ac.jp:2433/25934 + 2012-07-12T14:15:41Z + http://irdb.nii.ac.jp/oai + + + + + false + false + 0.9 + + + + +
\ No newline at end of file diff --git a/pom.xml b/pom.xml index 6e4526e41..fc4a8a21b 100644 --- a/pom.xml +++ b/pom.xml @@ -205,6 +205,11 @@ dateparser 1.0.7
+ + me.xuender + unidecode + 0.0.7 + com.google.guava From a9961a183598e77434831b5e24a4c1932a75f6f7 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 28 Jul 2021 16:21:29 +0200 Subject: [PATCH 095/287] [cleaning] title cleaning based on the me.xuender:unidecode library --- dhp-common/pom.xml | 5 ++ .../oaf/utils/GraphCleaningFunctions.java | 19 ++--- .../schema/oaf/utils/OafMapperUtilsTest.java | 25 ++++-- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 79 ++++++++++--------- .../eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml | 70 ++++++++++++++++ pom.xml | 5 ++ 6 files changed, 151 insertions(+), 52 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 74f31cf35..4c7810c47 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -25,6 +25,11 @@ com.github.sisyphsu dateparser + + me.xuender + unidecode + + org.apache.spark spark-core_2.11 diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index e5181b111..1d002ed7e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -7,22 +7,19 @@ import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.*; import java.util.function.Function; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; -import org.jetbrains.annotations.NotNull; import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.collect.Sets; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import me.xuender.unidecode.Unidecode; public class GraphCleaningFunctions extends CleaningFunctions { @@ -194,11 +191,15 @@ public class GraphCleaningFunctions extends CleaningFunctions { .filter(Objects::nonNull) .filter(sp -> StringUtils.isNotBlank(sp.getValue())) .filter( - sp -> sp - .getValue() - .toLowerCase() - .replaceAll(TITLE_FILTER_REGEX, "") - .length() > TITLE_FILTER_RESIDUAL_LENGTH) + sp -> { + final String title = sp + .getValue() + .toLowerCase(); + final String residual = Unidecode + .decode(title) + .replaceAll(TITLE_FILTER_REGEX, ""); + return residual.length() > TITLE_FILTER_RESIDUAL_LENGTH; + }) .map(GraphCleaningFunctions::cleanValue) .collect(Collectors.toList())); } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index eefa1e9a3..8d519a93f 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -4,12 +4,8 @@ package eu.dnetlib.dhp.schema.oaf.utils; import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; import java.util.HashSet; import java.util.List; -import java.util.Locale; -import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -19,13 +15,32 @@ import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; +import me.xuender.unidecode.Unidecode; public class OafMapperUtilsTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + @Test + public void testUnidecode() { + + assertEquals("Liu Ben Mu hiruzuSen tawa", Unidecode.decode("六本木ヒルズ森タワ")); + assertEquals("Nan Wu A Mi Tuo Fo", Unidecode.decode("南无阿弥陀佛")); + assertEquals("Yi Tiao Hui Zou Lu De Yu", Unidecode.decode("一条会走路的鱼")); + assertEquals("amidaniyorai", Unidecode.decode("あみだにょらい")); + assertEquals("T`owrk`iayi", Unidecode.decode("Թուրքիայի")); + assertEquals("Obzor tematiki", Unidecode.decode("Обзор тематики")); + assertEquals("GERMANSKIE IaZYKI", Unidecode.decode("ГЕРМАНСКИЕ ЯЗЫКИ")); + assertEquals("Diereunese tes ikanopoieses", Unidecode.decode("Διερεύνηση της ικανοποίησης")); + assertEquals("lqDy l'wly@", Unidecode.decode("القضايا الأولية")); + assertEquals("abc def ghi", Unidecode.decode("abc def ghi")); + } + @Test public void testDateValidation() { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index aeeceaeb5..ba4211a3f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,13 +1,15 @@ package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.cleanup; +import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.fixVocabularyNames; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -16,12 +18,14 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import java.io.IOException; -import java.util.List; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.lenient; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -340,7 +344,7 @@ public class MappersTest { assertEquals(2, p.getOriginalId().size()); assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); - //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + // assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); @@ -557,31 +561,6 @@ public class MappersTest { assertNotNull(d.getInstance().get(0).getUrl()); } - @Test - void testEnermaps() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("enermaps.xml")); - final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); - - System.out.println("***************"); - System.out.println(new ObjectMapper().writeValueAsString(list)); - System.out.println("***************"); - - assertEquals(1, list.size()); - assertTrue(list.get(0) instanceof Dataset); - - final Dataset d = (Dataset) list.get(0); - - assertValidId(d.getId()); - assertValidId(d.getCollectedfrom().get(0).getKey()); - assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); - assertEquals(1, d.getAuthor().size()); - assertEquals(1, d.getInstance().size()); - assertNotNull(d.getInstance().get(0).getUrl()); - assertNotNull(d.getContext()); - assertTrue(StringUtils.isNotBlank(d.getContext().get(0).getId())); - assertEquals("enermaps::selection::tgs00004", d.getContext().get(0).getId()); - } - @Test void testClaimFromCrossref() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_crossref.xml")); @@ -663,6 +642,30 @@ public class MappersTest { System.out.println(p.getTitle().get(0).getValue()); } + @Test + void testJairo() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_jairo.xml")); + final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Publication p = (Publication) list.get(0); + assertValidId(p.getId()); + assertValidId(p.getCollectedfrom().get(0).getKey()); + + assertNotNull(p.getTitle()); + assertFalse(p.getTitle().isEmpty()); + assertTrue(p.getTitle().size() == 1); + assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); + + final Publication p_cleaned = cleanup(fixVocabularyNames(p)); + + assertNotNull(p_cleaned.getTitle()); + assertFalse(p_cleaned.getTitle().isEmpty()); + } + @Test void testOdfFromHdfs() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_from_hdfs.xml")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml new file mode 100644 index 000000000..9ec696256 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml @@ -0,0 +1,70 @@ + + +
+ jairo_______::000012e58ed836576ef2a0d38b0f726f + oai:irdb.nii.ac.jp:01221:0000010198 + + + + + + 2021-05-10T11:31:09.424Z + 2021-06-03T01:45:42.536Z + jairo_______ +
+ + 多項式GCDを用いた復号法に関する研究 + 上原, 剛 + 甲斐, 博 + 野田, 松太郎 + application/pdf + http://hdl.handle.net/2433/25934 + jpn + 京都大学数理解析研究所 + 410 + Departmental Bulletin Paper + 0014 + 2004-10-01 + + openaire____::554c7c2873 + OPEN + + + 2433/25934 + AN00061013 + http://hdl.handle.net/2433/25934 + http://repository.kulib.kyoto-u.ac.jp/dspace/bitstream/2433/25934/1/1395-16.pdf + 数理解析研究所講究録 + + + + + https%3A%2F%2Firdb.nii.ac.jp%2Foai + oai:irdb.nii.ac.jp:01221:0000010198 + 2021-04-13T13:36:29Z + + + http://repository.kulib.kyoto-u.ac.jp/dspace-oai/request + oai:repository.kulib.kyoto-u.ac.jp:2433/25934 + 2012-07-12T14:15:41Z + http://irdb.nii.ac.jp/oai + + + + + false + false + 0.9 + + + + +
\ No newline at end of file diff --git a/pom.xml b/pom.xml index 19d00d637..075a45c9f 100644 --- a/pom.xml +++ b/pom.xml @@ -205,6 +205,11 @@ dateparser 1.0.7
+ + me.xuender + unidecode + 0.0.7 + com.google.guava From 4c5a71ba2f968e4a6eaf993eb2590306f92ad8d5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 28 Jul 2021 17:11:18 +0200 Subject: [PATCH 096/287] [broker] updated relation descriptors, making use of constant values --- .../EnrichMissingDatasetIsReferencedBy.java | 3 ++- .../EnrichMissingDatasetIsRelatedTo.java | 3 ++- .../EnrichMissingDatasetIsSupplementedBy.java | 3 ++- .../EnrichMissingDatasetIsSupplementedTo.java | 3 ++- .../EnrichMissingDatasetReferences.java | 3 ++- .../EnrichMissingPublicationIsReferencedBy.java | 3 ++- .../EnrichMissingPublicationIsRelatedTo.java | 3 ++- .../EnrichMissingPublicationIsSupplementedBy.java | 3 ++- .../EnrichMissingPublicationIsSupplementedTo.java | 3 ++- .../EnrichMissingPublicationReferences.java | 3 ++- .../eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java | 13 +++++++------ 11 files changed, 27 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java index 21786687e..bcbcf755f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDat @Override protected boolean filterByType(final String relType) { - return relType.equals("isReferencedBy"); + return relType.equals(ModelConstants.IS_REFERENCED_BY); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java index 0f3739434..4125974ce 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDatase @Override protected boolean filterByType(final String relType) { - return relType.equals("isRelatedTo"); + return relType.equals(ModelConstants.IS_RELATED_TO); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java index cde227fee..480daf666 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingD @Override protected boolean filterByType(final String relType) { - return relType.equals("isSupplementedBy"); + return relType.equals(ModelConstants.IS_SUPPLEMENTED_BY); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java index 750165ff5..97b1eb8bd 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingD @Override protected boolean filterByType(final String relType) { - return relType.equals("isSupplementedTo"); + return relType.equals(ModelConstants.IS_SUPPLEMENT_TO); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java index b1c0afe16..0978486a3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset @Override protected boolean filterByType(final String relType) { - return relType.equals("references"); + return relType.equals(ModelConstants.REFERENCES); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java index eebb5c1a6..ff9155c9d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissingPublication { @@ -11,6 +12,6 @@ public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissin @Override protected boolean filterByType(final String relType) { - return relType.equals("isReferencedBy"); + return relType.equals(ModelConstants.IS_REFERENCED_BY); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java index a8aa550d4..1051559c9 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPublication { @@ -11,7 +12,7 @@ public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPu @Override protected boolean filterByType(final String relType) { - return relType.equals("isRelatedTo"); + return relType.equals(ModelConstants.IS_RELATED_TO); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java index 762ac942e..d97f46f09 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMissingPublication { @@ -11,6 +12,6 @@ public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMiss @Override protected boolean filterByType(final String relType) { - return relType.equals("isSupplementedBy"); + return relType.equals(ModelConstants.IS_SUPPLEMENTED_BY); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java index fc7196a01..b33b340e3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMissingPublication { @@ -11,7 +12,7 @@ public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMiss @Override protected boolean filterByType(final String relType) { - return relType.equals("isSupplementedTo"); + return relType.equals(ModelConstants.IS_SUPPLEMENT_TO); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java index da1994454..fe0f96b6e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPublication { @@ -11,7 +12,7 @@ public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPub @Override protected boolean filterByType(final String relType) { - return relType.equals("references"); + return relType.equals(ModelConstants.REFERENCES); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java index c7be633a9..f578548fb 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java @@ -5,6 +5,7 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Set; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.lang3.StringUtils; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; @@ -52,15 +53,15 @@ public class ClusterUtils { } public static boolean isDedupRoot(final String id) { - return id.contains("dedup_wf_"); + return id.contains("dedup"); } public static final boolean isValidResultResultClass(final String s) { - return s.equals("isReferencedBy") - || s.equals("isRelatedTo") - || s.equals("references") - || s.equals("isSupplementedBy") - || s.equals("isSupplementedTo"); + return s.equals(ModelConstants.IS_REFERENCED_BY) + || s.equals(ModelConstants.IS_RELATED_TO) + || s.equals(ModelConstants.REFERENCES) + || s.equals(ModelConstants.IS_SUPPLEMENTED_BY) + || s.equals(ModelConstants.IS_SUPPLEMENT_TO); } public static T incrementAccumulator(final T o, final LongAccumulator acc) { From 3721df7aa6d54bf20c1c709a1dcd3be3d8dc3af4 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 29 Jul 2021 10:45:24 +0200 Subject: [PATCH 097/287] refactoring create actionset of scholexplorer, moved on package dhp-aggregation --- .../scholix}/SparkCreateActionset.scala | 22 +++++++++---------- .../scholix}/SparkSaveActionSet.scala | 14 ++++++------ .../dhp/sx/actionset/generate_actionset.json | 0 .../sx/actionset/oozie_app/config-default.xml | 0 .../dhp/sx/actionset/oozie_app/workflow.xml | 4 ++-- .../dhp/sx/actionset/save_actionset.json | 0 6 files changed, 20 insertions(+), 20 deletions(-) rename dhp-workflows/{dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix}/SparkCreateActionset.scala (63%) rename dhp-workflows/{dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix}/SparkSaveActionSet.scala (86%) rename dhp-workflows/{dhp-graph-provision => dhp-aggregation}/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json (100%) rename dhp-workflows/{dhp-graph-provision => dhp-aggregation}/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml (100%) rename dhp-workflows/{dhp-graph-provision => dhp-aggregation}/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml (95%) rename dhp-workflows/{dhp-graph-provision => dhp-aggregation}/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json (100%) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala similarity index 63% rename from dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala index faf386d25..b78f411ee 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala @@ -1,9 +1,9 @@ -package eu.dnetlib.dhp.sx.provision +package eu.dnetlib.dhp.actionmanager.scholix import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} -import org.apache.spark.{SparkConf, sql} -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.SparkConf +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} import scala.io.Source @@ -34,16 +34,16 @@ object SparkCreateActionset { val workingDirFolder = parser.get("workingDirFolder") log.info(s"workingDirFolder -> $workingDirFolder") - implicit val oafEncoders:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val resultEncoders:Encoder[Result] = Encoders.kryo[Result] - implicit val relationEncoders:Encoder[Relation] = Encoders.kryo[Relation] + implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoders: Encoder[Result] = Encoders.kryo[Result] + implicit val relationEncoders: Encoder[Relation] = Encoders.kryo[Relation] - import spark.implicits._ + import spark.implicits._ val relation = spark.read.load(s"$sourcePath/relation").as[Relation] - relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) - .flatMap(r => List(r.getSource,r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation") + relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .flatMap(r => List(r.getSource, r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation") val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String] @@ -53,12 +53,12 @@ object SparkCreateActionset { log.info("save relation filtered") - relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf") log.info("saving entities") - val entities:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders)) + val entities: Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders)) entities.filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala similarity index 86% rename from dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala index d1d0b8424..1df7ea3fb 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala @@ -1,9 +1,9 @@ -package eu.dnetlib.dhp.sx.provision +package eu.dnetlib.dhp.actionmanager.scholix import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Relation, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{Oaf, Dataset => OafDataset,Publication, Software, OtherResearchProduct, Relation} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.GzipCodec import org.apache.hadoop.mapred.SequenceFileOutputFormat @@ -73,13 +73,13 @@ object SparkSaveActionSet { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - implicit val oafEncoders:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val tEncoder:Encoder[(String,String)] = Encoders.tuple(Encoders.STRING,Encoders.STRING) + implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val tEncoder: Encoder[(String, String)] = Encoders.tuple(Encoders.STRING, Encoders.STRING) spark.read.load(sourcePath).as[Oaf] - .map(o =>toActionSet(o)) - .filter(o => o!= null) - .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec]) + .map(o => toActionSet(o)) + .filter(o => o != null) + .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text, Text]], classOf[GzipCodec]) } diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json similarity index 100% rename from dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml similarity index 95% rename from dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml index 7c4b3dd26..8c045fcfe 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml @@ -26,7 +26,7 @@ cluster Create Action Set eu.dnetlib.dhp.sx.provision.SparkCreateActionset - dhp-graph-provision-${projectVersion}.jar + dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -53,7 +53,7 @@ cluster Save Action Set eu.dnetlib.dhp.sx.provision.SparkSaveActionSet - dhp-graph-provision-${projectVersion}.jar + dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json similarity index 100% rename from dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json From 908f57a4758b2aa2ff1c93cad41fcf2328c73bbe Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 10:49:39 +0200 Subject: [PATCH 098/287] code formatting --- .../main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java index f578548fb..7c4ca1d22 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java @@ -5,7 +5,6 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Set; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.lang3.StringUtils; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; @@ -18,6 +17,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; public class ClusterUtils { From 1923c1ce21658c05e9822c9363a3509e4b63eb32 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 11:36:20 +0200 Subject: [PATCH 099/287] replaced full join + filtering with a left join --- .../dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java index 151f5ba3f..c2bcf69f0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -80,8 +80,7 @@ public class PatchRelationsApplication { final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); rels - .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "full") - .filter((FilterFunction>) t -> Objects.nonNull(t._1())) + .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") .map((MapFunction, Relation>) t -> { final Relation r = t._1(); Optional.ofNullable(t._2()) @@ -89,8 +88,7 @@ public class PatchRelationsApplication { .ifPresent(r::setSource); return r; }, Encoders.bean(Relation.class)) - .joinWith(idMapping, rels.col("target").equalTo(idMapping.col("oldId")), "full") - .filter((FilterFunction>) t -> Objects.nonNull(t._1())) + .joinWith(idMapping, rels.col("target").equalTo(idMapping.col("oldId")), "left") .map((MapFunction, Relation>) t -> { final Relation r = t._1(); Optional.ofNullable(t._2()) From e87e1805c4280e9d8ed9be9f733d331401273118 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 12:13:06 +0200 Subject: [PATCH 100/287] [raw_all] added extra workflow step for patching the identifiers in the relations, given an id mapping dataset --- .../graph/raw/PatchRelationsApplication.java | 115 ++++++++++++++++++ .../graph/raw/common/RelationIdMapping.java | 24 ++++ .../oa/graph/patch_relations_parameters.json | 26 ++++ .../oa/graph/raw_all/oozie_app/workflow.xml | 47 ++++++- 4 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java new file mode 100644 index 000000000..c2bcf69f0 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -0,0 +1,115 @@ +package eu.dnetlib.dhp.oa.graph.raw; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.raw.common.RelationIdMapping; +import eu.dnetlib.dhp.schema.oaf.Relation; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + +import java.io.FileNotFoundException; +import java.util.Objects; +import java.util.Optional; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class PatchRelationsApplication { + + private static final Logger log = LoggerFactory.getLogger(PatchRelationsApplication.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Optional.ofNullable( + PatchRelationsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json")) + .orElseThrow(FileNotFoundException::new) + )); + parser.parseArgument(args); + + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String graphBasePath = parser.get("graphBasePath"); + log.info("graphBasePath: {}", graphBasePath); + + final String workingDir = parser.get("workingDir"); + log.info("workingDir: {}", workingDir); + + final String idMappingPath = parser.get("idMappingPath"); + log.info("idMappingPath: {}", idMappingPath); + + final SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> patchRelations(spark, graphBasePath, workingDir, idMappingPath)); + } + + /** + * Substitutes the identifiers (source/target) from the set of relations part of the graphBasePath included in the + * mapping provided by the dataset stored on idMappingPath, using workingDir as intermediate storage location. + * + * @param spark the SparkSession + * @param graphBasePath base graph path providing the set of relations to patch + * @param workingDir intermediate storage location + * @param idMappingPath dataset providing the old -> new identifier mapping + */ + private static void patchRelations(final SparkSession spark, final String graphBasePath, final String workingDir, final String idMappingPath) { + + final String relationPath = graphBasePath + "/relation"; + + final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); + final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); + + rels + .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional.ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setSource); + return r; + }, Encoders.bean(Relation.class)) + .joinWith(idMapping, rels.col("target").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional.ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setTarget); + return r; + }, Encoders.bean(Relation.class)) + .map( + (MapFunction) OBJECT_MAPPER::writeValueAsString, + Encoders.STRING()) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(workingDir); + + spark.read().textFile(workingDir) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(relationPath); + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java new file mode 100644 index 000000000..f251da8c3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java @@ -0,0 +1,24 @@ +package eu.dnetlib.dhp.oa.graph.raw.common; + +public class RelationIdMapping { + + private String oldId; + + private String newId; + + public String getOldId() { + return oldId; + } + + public void setOldId(final String oldId) { + this.oldId = oldId; + } + + public String getNewId() { + return newId; + } + + public void setNewId(final String newId) { + this.newId = newId; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json new file mode 100644 index 000000000..178c2d69b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "g", + "paramLongName": "graphBasePath", + "paramDescription": "base graph path providing the set of relations to patch", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workingDir", + "paramDescription": "intermediate storage location", + "paramRequired": true + }, + { + "paramName": "i", + "paramLongName": "idMappingPath", + "paramDescription": "dataset providing the old -> new identifier mapping", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index 7f1ecb39f..e7320de3b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -100,6 +100,16 @@ a blacklist of nsprefixes (comma separeted) + + shouldPatchRelations + false + activates the relation patching phase, driven by the content in ${idMappingPath} + + + idMappingPath + + path pointing to the relations identifiers mapping dataset + sparkDriverMemory memory for driver process @@ -538,7 +548,42 @@ - + + + + + + ${(shouldPatchRelations eq "true") and + (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} + + + + + + + + yarn + cluster + PatchRelations + eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --graphBasePath${workingDir}/graph_raw + --workingDir${workingDir}/patch_relations + --idMappingPath${idMappingPath} + + + + From 5d08ad86ae45478db3742fef51c7c0ae38f30e34 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 13:03:16 +0200 Subject: [PATCH 101/287] [raw_all] patching relation identifier phase to be run at the end, i.e. includes also claimed relations --- .../oa/graph/raw_all/oozie_app/workflow.xml | 75 +++++++++---------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index e7320de3b..321ca4090 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -548,42 +548,7 @@ - - - - - - ${(shouldPatchRelations eq "true") and - (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} - - - - - - - - yarn - cluster - PatchRelations - eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory ${sparkExecutorMemory} - --executor-cores ${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=7680 - - --graphBasePath${workingDir}/graph_raw - --workingDir${workingDir}/patch_relations - --idMappingPath${idMappingPath} - - - - + @@ -596,7 +561,6 @@ - yarn @@ -805,7 +769,42 @@ - + + + + + + ${(shouldPatchRelations eq "true") and + (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} + + + + + + + + yarn + cluster + PatchRelations + eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --graphBasePath${graphOutputPath} + --workingDir${workingDir}/patch_relations + --idMappingPath${idMappingPath} + + + + \ No newline at end of file From e725c88ebb6eedea235abfc07af932756c9e2397 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 13:03:16 +0200 Subject: [PATCH 102/287] [raw_all] patching relation identifier phase to be run at the end, i.e. includes also claimed relations --- .../oa/graph/raw_all/oozie_app/workflow.xml | 75 +++++++++---------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index e7320de3b..321ca4090 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -548,42 +548,7 @@ - - - - - - ${(shouldPatchRelations eq "true") and - (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} - - - - - - - - yarn - cluster - PatchRelations - eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory ${sparkExecutorMemory} - --executor-cores ${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=7680 - - --graphBasePath${workingDir}/graph_raw - --workingDir${workingDir}/patch_relations - --idMappingPath${idMappingPath} - - - - + @@ -596,7 +561,6 @@ - yarn @@ -805,7 +769,42 @@ - + + + + + + ${(shouldPatchRelations eq "true") and + (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} + + + + + + + + yarn + cluster + PatchRelations + eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --graphBasePath${graphOutputPath} + --workingDir${workingDir}/patch_relations + --idMappingPath${idMappingPath} + + + + \ No newline at end of file From baad01cadcc499909d68651ff03a60213b7b8c76 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 29 Jul 2021 13:04:39 +0200 Subject: [PATCH 103/287] hostedbymap --- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 3 +- .../doiboost/DoiBoostMappingUtil.scala | 2 + .../dhp/oa/graph/hostebymap/Aggregators.scala | 54 ------ .../dhp/oa/graph/hostebymap/Constants.java | 15 -- .../dhp/oa/graph/hostebymap/GetCSV.java | 111 ------------ .../oa/graph/hostebymap/model/DOAJModel.java | 53 ------ .../hostebymap/model/UnibiGoldModel.java | 44 ----- .../oa/graph/hostedbymap/Aggregators.scala | 97 ++++++++++ .../dhp/oa/graph/hostedbymap/Constants.java | 13 ++ .../dhp/oa/graph/hostedbymap/GetCSV.java | 107 +++++++++++ .../SparkProduceHostedByMap.scala} | 107 ++++++----- .../oa/graph/hostedbymap/model/DOAJModel.java | 52 ++++++ .../hostedbymap/model/UnibiGoldModel.java | 45 +++++ .../hostedbymap/download_csv_parameters.json | 37 ++++ .../oa/graph/hostedbymap/hostedby_params.json | 38 ++++ .../hostedbymap/oozie_app/config-default.xml | 30 ++++ .../graph/hostedbymap/oozie_app/workflow.xml | 148 +++++++++++++++ .../oa/graph/hostedbymap/TestPreprocess.scala | 127 +++++++++---- .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 168 +++++++++--------- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 28 +-- .../dhp/oa/graph/hostedbymap/datasource.json | 2 +- .../graph/hostedbymap/datasourceHostedByItem | 9 + .../dhp/oa/graph/hostedbymap/doajHostedByItem | 25 +++ .../oa/graph/hostedbymap/unibyHostedByItem | 29 +++ 24 files changed, 885 insertions(+), 459 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java rename dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/{hostebymap/SparkPrepareHostedByMapData.scala => hostedbymap/SparkProduceHostedByMap.scala} (76%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 3f27b9442..58009bfcf 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,8 +38,7 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class -GroupEntitiesSparkJob { +public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 686a2f1f1..502cb370f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -245,6 +245,8 @@ object DoiBoostMappingUtil { if (item != null) { hb.setValue(item.officialname) hb.setKey(generateDSId(item.id)) + //TODO replace with the one above as soon as the new HBM will be used + //hb.setKey(item.id) if (item.openAccess) { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala deleted file mode 100644 index 561d2bbf4..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala +++ /dev/null @@ -1,54 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap - -import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} -import org.apache.spark.sql.expressions.Aggregator - - -case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} -case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} - -object Aggregators { - - - - def getId(s1:String, s2:String) : String = { - if (!s1.equals("")){ - return s1} - s2 - } - - - def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { - val transformedData : Dataset[HostedByItemType] = df - .groupByKey(_.id)(Encoders.STRING) - .agg(Aggregators.hostedByAggregator) - .map{ - case (id:String , res:HostedByItemType) => res - }(Encoders.product[HostedByItemType]) - - transformedData - } - - val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { - override def zero: HostedByItemType = HostedByItemType("","","","","",false) - override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { - return merge(b, a) - } - override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - - HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) - - } - override def finish(reduction: HostedByItemType): HostedByItemType = reduction - override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - - override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - }.toColumn - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java deleted file mode 100644 index b07e33cd1..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java +++ /dev/null @@ -1,15 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap; - -public class Constants { - - - - public static final String OPENAIRE = "openaire"; - public static final String DOAJ = "doaj"; - public static final String UNIBI = "unibi"; - - - public static final String ISSN = "issn"; - public static final String EISSN = "eissn"; - public static final String ISSNL = "issnl"; -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java deleted file mode 100644 index d397886a3..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java +++ /dev/null @@ -1,111 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.opencsv.bean.CsvToBeanBuilder; -import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.conf.Configuration; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import java.io.*; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Optional; - -public class GetCSV { - private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV.class); - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - GetCSV.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - final String hdfsPath = parser.get("hdfsPath"); - final String hdfsNameNode = parser.get("hdfsNameNode"); - final String classForName = parser.get("classForName"); - final Boolean shouldReplace = Optional.ofNullable((parser.get("replace"))) - .map(Boolean::valueOf) - .orElse(false); - - - URLConnection connection = new URL(fileURL).openConnection(); - connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - connection.connect(); - - BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); - - if(shouldReplace){ - PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv"))); - String line = null; - while((line = in.readLine())!= null){ - writer.println(line.replace("\\\"", "\"")); - } - writer.close(); - in.close(); - in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); - } - - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; - if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath, false); - } - fsDataOutputStream = fileSystem.create(hdfsWritePath); - - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); - - Class clazz = Class.forName(classForName); - - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(clazz) - .withMultilineLimit(1) - .build() - .parse() - .forEach(line -> { - try { - writer.write(mapper.writeValueAsString(line)); - writer.newLine(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - - - - writer.close(); - in.close(); - if(shouldReplace){ - File f = new File("/tmp/DOAJ.csv"); - f.delete(); - } - - - } - - - - -} - diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java deleted file mode 100644 index fe1d14a76..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java +++ /dev/null @@ -1,53 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap.model; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - - -public class DOAJModel implements Serializable { - @CsvBindByName(column = "Journal title") - private String journalTitle; - - @CsvBindByName(column = "Journal ISSN (print version)") - private String issn ; - - @CsvBindByName(column = "Journal EISSN (online version)") - private String eissn; - - @CsvBindByName(column = "Review process") - private String reviewProcess; - - - public String getJournalTitle() { - return journalTitle; - } - - public void setJournalTitle(String journalTitle) { - this.journalTitle = journalTitle; - } - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getEissn() { - return eissn; - } - - public void setEissn(String eissn) { - this.eissn = eissn; - } - - public String getReviewProcess() { - return reviewProcess; - } - - public void setReviewProcess(String reviewProcess) { - this.reviewProcess = reviewProcess; - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java deleted file mode 100644 index 309f74eea..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java +++ /dev/null @@ -1,44 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap.model; - -import com.opencsv.bean.CsvBindByName; - -import java.io.Serializable; - -public class UnibiGoldModel implements Serializable { - @CsvBindByName(column = "ISSN") - private String issn; - @CsvBindByName(column = "ISSN_L") - private String issn_l; - @CsvBindByName(column = "TITLE") - private String title; - @CsvBindByName(column = "TITLE_SOURCE") - private String title_source; - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getIssn_l() { - return issn_l; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getTitle_source() { - return title_source; - } - - public void setTitle_source(String title_source) { - this.title_source = title_source; - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala new file mode 100644 index 000000000..6a9346ed5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -0,0 +1,97 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} +import org.apache.spark.sql.expressions.Aggregator + + +case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} +case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} + +object Aggregators { + + + + def getId(s1:String, s2:String) : String = { + if (s1.startsWith("10|")){ + return s1} + s2 + } + + def getValue(s1:String, s2:String) : String = { + if(!s1.equals("")){ + return s1 + } + s2 + } + + + def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { + val transformedData : Dataset[HostedByItemType] = df + .groupByKey(_.id)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator) + .map{ + case (id:String , res:HostedByItemType) => res + }(Encoders.product[HostedByItemType]) + + transformedData + } + + val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { + override def zero: HostedByItemType = HostedByItemType("","","","","",false) + override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { + return merge(b, a) + } + override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + + HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) + + } + override def finish(reduction: HostedByItemType): HostedByItemType = reduction + override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + + override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + }.toColumn + + def explodeHostedByItemType(df: Dataset[(String, HostedByItemType)]): Dataset[(String, HostedByItemType)] = { + val transformedData : Dataset[(String, HostedByItemType)] = df + .groupByKey(_._1)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator1) + .map{ + case (id:String , res:(String, HostedByItemType)) => res + }(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + + transformedData + } + + val hostedByAggregator1: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { + override def zero: (String, HostedByItemType) = ("", HostedByItemType("","","","","",false)) + override def reduce(b: (String, HostedByItemType), a:(String,HostedByItemType)): (String, HostedByItemType) = { + return merge(b, a) + } + override def merge(b1: (String, HostedByItemType), b2: (String, HostedByItemType)): (String, HostedByItemType) = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(b1._2.id.startsWith("10|")){ + return (b1._1, HostedByItemType(b1._2.id, b1._2.officialname, b1._2.issn, b1._2.eissn, b1._2.lissn, b1._2.openAccess || b2._2.openAccess)) + + } + return (b2._1, HostedByItemType(b2._2.id, b2._2.officialname, b2._2.issn, b2._2.eissn, b2._2.lissn, b1._2.openAccess || b2._2.openAccess)) + + } + override def finish(reduction: (String,HostedByItemType)): (String, HostedByItemType) = reduction + override def bufferEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) + + override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) + }.toColumn + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java new file mode 100644 index 000000000..b29877a48 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java @@ -0,0 +1,13 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +public class Constants { + + public static final String OPENAIRE = "openaire"; + public static final String DOAJ = "doaj"; + public static final String UNIBI = "unibi"; + + public static final String ISSN = "issn"; + public static final String EISSN = "eissn"; + public static final String ISSNL = "issnl"; +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java new file mode 100644 index 000000000..9516cf6f7 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java @@ -0,0 +1,107 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetCSV { + private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GetCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("workingPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + final String classForName = parser.get("classForName"); + final Boolean shouldReplace = Optional + .ofNullable((parser.get("replace"))) + .map(Boolean::valueOf) + .orElse(false); + + URLConnection connection = new URL(fileURL).openConnection(); + connection + .setRequestProperty( + "User-Agent", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); + + BufferedReader in = new BufferedReader( + new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + + if (shouldReplace) { + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv"))); + String line = null; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + } + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + + Class clazz = Class.forName(classForName); + + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(in) + .withType(clazz) + .withMultilineLimit(1) + .build() + .parse() + .forEach(line -> { + try { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + writer.close(); + in.close(); + if (shouldReplace) { + File f = new File("/tmp/DOAJ.csv"); + f.delete(); + } + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index b66e97281..c44f2cbed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -1,17 +1,23 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap +package eu.dnetlib.dhp.oa.graph.hostedbymap import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.graph.hostebymap.model.{DOAJModel, UnibiGoldModel} -import eu.dnetlib.dhp.schema.oaf.{Datasource} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} +import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} - import com.fasterxml.jackson.databind.ObjectMapper +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path +import java.io.PrintWriter -object SparkPrepareHostedByMapData { +import org.apache.hadoop.io.compress.GzipCodec + + +object SparkProduceHostedByMap { implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) @@ -37,24 +43,32 @@ object SparkPrepareHostedByMapData { } } -// def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { -// implicit val formats = DefaultFormats -// val serializedJSON:String = write(input) -// -// var hostedBy = new ListBuffer[String]() -// if(!input.issn.equals("")){ -// hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" -// } -// if(!input.eissn.equals("")){ -// hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" -// } -// if(!input.lissn.equals("")){ -// hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" -// } -// -// hostedBy -// -// } + def toHostedByMap(input: (String, HostedByItemType)): String = { + import org.json4s.jackson.Serialization + + implicit val formats = org.json4s.DefaultFormats + + val map: Map [String, HostedByItemType] = Map (input._1 -> input._2 ) + + Serialization.write(map) + + + } + + /** + * + def toHostedByMap(input: Map[String, HostedByItemType]): String = { + import org.json4s.jackson.Serialization + + implicit val formats = org.json4s.DefaultFormats + + + + Serialization.write(input) + + + } + */ def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { if(issn != null){ @@ -166,11 +180,31 @@ object SparkPrepareHostedByMapData { } + + def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode : String):Unit = { + val conf = new Configuration() + + conf.set("fs.defaultFS", hdfsNameNode) + val fs= FileSystem.get(conf) + val output = fs.create(new Path(outputPath)) + val writer = new PrintWriter(output) + try { + input.foreach(hbi => writer.println(hbi)) + } + finally { + writer.close() + + } + + } + + + def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedby/prepare_hostedby_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession @@ -179,11 +213,10 @@ object SparkPrepareHostedByMapData { .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() - import spark.implicits._ val datasourcePath = parser.get("datasourcePath") val workingDirPath = parser.get("workingPath") - + val outputPath = parser.get("outputPath") implicit val formats = DefaultFormats @@ -191,29 +224,15 @@ object SparkPrepareHostedByMapData { logger.info("Getting the Datasources") - // val doajDataset: Dataset[DOAJModel] = spark.read.textFile(workingDirPath + "/doaj").as[DOAJModel] - val dats : Dataset[HostedByItemType] = - oaHostedByDataset(spark, datasourcePath) + Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath) .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold")) .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) - dats.flatMap(hbi => toList(hbi)) - .groupByKey(_._1) + .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) + .map(hbi => toHostedByMap(hbi))(Encoders.STRING) + .rdd.saveAsTextFile(outputPath + "/hostedByMap", classOf[GzipCodec]) -// -// - -// - -// -// Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") -// .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) -// .filter(i => i != null)) -// .flatMap(toHostedByMap) -// .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") -// -// } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java new file mode 100644 index 000000000..ba804b939 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") + private String journalTitle; + + @CsvBindByName(column = "Journal ISSN (print version)") + private String issn; + + @CsvBindByName(column = "Journal EISSN (online version)") + private String eissn; + + @CsvBindByName(column = "Review process") + private String reviewProcess; + + public String getJournalTitle() { + return journalTitle; + } + + public void setJournalTitle(String journalTitle) { + this.journalTitle = journalTitle; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getReviewProcess() { + return reviewProcess; + } + + public void setReviewProcess(String reviewProcess) { + this.reviewProcess = reviewProcess; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java new file mode 100644 index 000000000..0927a136b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java @@ -0,0 +1,45 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") + private String issn; + @CsvBindByName(column = "ISSN_L") + private String issn_l; + @CsvBindByName(column = "TITLE") + private String title; + @CsvBindByName(column = "TITLE_SOURCE") + private String title_source; + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getIssn_l() { + return issn_l; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getTitle_source() { + return title_source; + } + + public void setTitle_source(String title_source) { + this.title_source = title_source; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json new file mode 100644 index 000000000..fba048343 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -0,0 +1,37 @@ + +[ + + { + "paramName":"fu", + "paramLongName":"fileURL", + "paramDescription": "the url to download the csv file ", + "paramRequired": true + }, + + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + { + "paramName": "hnn", + "paramLongName": "hdfsNameNode", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true + }, + { + "paramName": "cfn", + "paramLongName": "classForName", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + { + "paramName": "sr", + "paramLongName": "replace", + "paramDescription": "true if the input file has to be cleaned before parsing", + "paramRequired": false + } +] + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json new file mode 100644 index 000000000..9173b78ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json @@ -0,0 +1,38 @@ + +[ + + { + "paramName":"dsp", + "paramLongName":"datasourcePath", + "paramDescription": "the path to the datasource ", + "paramRequired": true + }, + + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] + + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml new file mode 100644 index 000000000..e5ec3d0ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml new file mode 100644 index 000000000..ecf6c3b31 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -0,0 +1,148 @@ + + + + + sourcePath + the source path + + + outputPath + the output path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + --hdfsNameNode${nameNode} + --fileURL${unibiFileURL} + --workingPath${workingDir}/unibi_gold + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + --hdfsNameNode${nameNode} + --fileURL${doajFileURL} + --workingPath${workingDir}/doaj + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel + --replacetrue + + + + + + + + + + + + yarn-cluster + Produce the new HostedByMap + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkProduceHostedByMap + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --datasourcePath${sourcePath}/datasource + --workingPath${workingDir} + --outputPath${outputPath} + --masteryarn-cluster + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 8e5657bfc..2ed76a72a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -1,19 +1,14 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap -import java.sql.Timestamp - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.oa.graph.hostebymap.{Constants, HostedByInfo, SparkPrepareHostedByMapData} +import eu.dnetlib.dhp.oa.graph.hostedbymap.{Aggregators, Constants, HostedByInfo, HostedByItemType, SparkProduceHostedByMap} import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} import org.junit.jupiter.api.Test -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.mutable.ListBuffer -import scala.io.Source +import org.junit.jupiter.api.Assertions._ +import org.json4s.jackson.Serialization.write class TestPreprocess extends java.io.Serializable{ @@ -21,19 +16,14 @@ class TestPreprocess extends java.io.Serializable{ implicit val schema = Encoders.product[HostedByInfo] + def toHBIString (hbi:HostedByItemType): String = { + implicit val formats = DefaultFormats + + write(hbi) + } @Test def readDatasource():Unit = { - - - import org.apache.spark.sql.Encoders - implicit val formats = DefaultFormats - - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() - - - val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -45,25 +35,29 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("datasource.json").getPath + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.oaHostedByDataset(spark, path) - println(SparkPrepareHostedByMapData.oaHostedByDataset(spark, path).count) + assertEquals(9, ds.count) + assertEquals(8, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(5, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365")).count == 1) + assertTrue(ds.filter(hbi => hbi.eissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.id.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) + ds.foreach(hbi => assertTrue(hbi.id.startsWith("10|"))) + ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } @Test def readGold():Unit = { - - implicit val formats = DefaultFormats - - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() - - - val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -76,23 +70,27 @@ class TestPreprocess extends java.io.Serializable{ val path = getClass.getResource("unibi_transformed.json").getPath - println(SparkPrepareHostedByMapData.goldHostedByDataset(spark, path).count) + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.goldHostedByDataset(spark, path) + assertEquals(29, ds.count) + assertEquals(29, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(29, ds.filter(hbi => !hbi.lissn.equals("")).count) + + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().officialname.equals("European journal of sustainable development.")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().lissn.equals("2239-5938")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).count == 1) + ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.UNIBI))) + ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } @Test def readDoaj():Unit = { - - implicit val formats = DefaultFormats - - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() - - - val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -104,14 +102,69 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("doaj_transformed.json").getPath + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.doajHostedByDataset(spark, path) - println(SparkPrepareHostedByMapData.doajHostedByDataset(spark, path).count) + assertEquals(25, ds.count) + assertEquals(14, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(21, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().officialname.equals("Journal of Space Technology")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().eissn.equals("2411-5029")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).count == 1) + assertTrue(ds.filter(hbi => hbi.eissn.equals("2077-2955")).first().issn.equals("")) + ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.DOAJ))) + ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } + @Test + def testAggregator() : Unit = { + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + + + val tmp = SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) + .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + + assertEquals(106, tmp.count) + assertEquals(82, tmp.map(i => i._1)(Encoders.STRING).distinct().count) + + + val ds :Dataset[(String, HostedByItemType)] = Aggregators.explodeHostedByItemType(SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) + .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]))) + + assertEquals(82, ds.count) + + assertEquals(13, ds.filter(i => i._2.id.startsWith("10|")).count) + + assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.id.startsWith("10|")) + assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.openAccess) + assertEquals(1, ds.filter(i => i._1.equals("2077-3757")).count) + + val hbmap : Dataset[String] = ds.filter(hbi => hbi._2.id.startsWith("10|")).map(SparkProduceHostedByMap.toHostedByMap)(Encoders.STRING) + + hbmap.foreach(entry => println(entry)) + spark.close() + + } + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java index 01c70502c..f886b275b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -1,111 +1,109 @@ + package eu.dnetlib.dhp.oa.graph.hostedbymap; +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.util.List; + +import org.junit.jupiter.api.Test; + import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.opencsv.bean.CsvToBeanBuilder; -import eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV; -import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; -import org.junit.jupiter.api.Test; -import java.io.*; -import java.net.MalformedURLException; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.List; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; -public class TestReadCSV { +public class TestReadCSV { - @Test - public void testCSVUnibi() throws FileNotFoundException { + @Test + public void testCSVUnibi() throws FileNotFoundException { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") + .getPath(); - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") - .getPath(); + List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) + .withType(UnibiGoldModel.class) + .build() + .parse(); - List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) - .withType(UnibiGoldModel.class) - .build() - .parse(); + ObjectMapper mapper = new ObjectMapper(); - ObjectMapper mapper = new ObjectMapper(); + beans.forEach(r -> { + try { + System.out.println(mapper.writeValueAsString(r)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); - beans.forEach(r -> { - try { - System.out.println(mapper.writeValueAsString(r)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - }); + } + @Test + public void testCSVUrlUnibi() throws IOException { - } + URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); - @Test - public void testCSVUrlUnibi() throws IOException { + BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); + ObjectMapper mapper = new ObjectMapper(); - URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel.class) + .build() + .parse() + .forEach(line -> - BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); - ObjectMapper mapper = new ObjectMapper(); + { + try { + System.out.println(mapper.writeValueAsString(line)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel.class) - .build() - .parse() - .forEach(line -> + ); + } - { - try { - System.out.println(mapper.writeValueAsString(line)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } + @Test + public void testCSVUrlDOAJ() throws IOException { + URLConnection connection = new URL("https://doaj.org/csv").openConnection(); + connection + .setRequestProperty( + "User-Agent", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); - ); - } + BufferedReader in = new BufferedReader( + new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + // BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); + String line = null; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); + ObjectMapper mapper = new ObjectMapper(); - @Test - public void testCSVUrlDOAJ() throws IOException { + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel.class) + .withMultilineLimit(1) + .build() + .parse() + .forEach(lline -> - URLConnection connection = new URL("https://doaj.org/csv").openConnection(); - connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - connection.connect(); + { + try { + System.out.println(mapper.writeValueAsString(lline)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } - BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); - //BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); - PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); - String line = null; - while((line = in.readLine())!= null){ - writer.println(line.replace("\\\"", "\"")); - } - writer.close(); - in.close(); - in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); - ObjectMapper mapper = new ObjectMapper(); - - - - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.DOAJModel.class) - .withMultilineLimit(1) - .build() - .parse() - .forEach(lline -> - - { - try { - System.out.println(mapper.writeValueAsString(lline)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } - - - ); - } + ); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c41a6c68c..63f18a803 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,13 +1,13 @@ package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -16,12 +16,14 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import java.io.IOException; -import java.util.List; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.lenient; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -340,7 +342,7 @@ public class MappersTest { assertEquals(2, p.getOriginalId().size()); assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); - //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + // assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json index 818aaa716..4467c702f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -1,6 +1,6 @@ {"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2077-3757","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem new file mode 100644 index 000000000..093c57a9c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem @@ -0,0 +1,9 @@ +{"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","officialname":"Известия высших учебных заведений: Проблемы энергетики","issn":"1998-9903","eissn":"","lissn":"","openAccess":false} +{"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","officialname":"Thémata","issn":"0212-8365","eissn":"2253-900X","lissn":"","openAccess":false} +{"id":"10|issn___print::051e86306840dc8255d95c5671e97928","officialname":"Science Technology & Public Policy","issn":"2640-4613","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","officialname":"Cahiers d’études germaniques","issn":"0751-4239","eissn":"2605-8359","lissn":"","openAccess":false} +{"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","officialname":"Regional Economics Theory and Practice","issn":"2073-1477","eissn":"2311-8733","lissn":"","openAccess":false} +{"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","officialname":"Transplantation","issn":"0041-1337","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","officialname":"International Journal of Operations Research and Information Systems","issn":"1947-9328","eissn":"1947-9336","lissn":"","openAccess":false} +{"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","officialname":"Bulletin of the British Mycological Society","issn":"0007-1528","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","officialname":"Journal of Technology and Innovation","issn":"","eissn":"2410-3993","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem new file mode 100644 index 000000000..effd0dd60 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem @@ -0,0 +1,25 @@ +{"id":"doaj","officialname":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","lissn":"","openAccess":true} +{"id":"doaj","officialname":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Membranes","issn":"2077-0375","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Agriculture","issn":"","eissn":"2077-0472","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Religions","issn":"","eissn":"2077-1444","lissn":"","openAccess":true} +{"id":"doaj","officialname":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","lissn":"","openAccess":true} +{"id":"doaj","officialname":"UCV-Scientia","issn":"2077-172X","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Granì","issn":"2077-1800","eissn":"2413-8738","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Science Education International","issn":"","eissn":"2077-2327","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Edumecentro","issn":"","eissn":"2077-2874","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Monteverdia","issn":"","eissn":"2077-2890","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Transformación","issn":"","eissn":"2077-2955","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Revue de Primatologie","issn":"","eissn":"2077-3757","lissn":"","openAccess":true} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem new file mode 100644 index 000000000..403ffdf5d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem @@ -0,0 +1,29 @@ +{"id":"unibi","officialname":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","issn":"2502-731X","eissn":"","lissn":"2502-731X","openAccess":true} +{"id":"unibi","officialname":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","issn":"2502-7409","eissn":"","lissn":"1411-0253","openAccess":true} +{"id":"unibi","officialname":"At-Tadbir : jurnal ilmiah manajemen","issn":"2502-7433","eissn":"","lissn":"2502-7433","openAccess":true} +{"id":"unibi","officialname":"Jurnal Kesehatan Panrita Husada.","issn":"2502-745X","eissn":"","lissn":"2502-745X","openAccess":true} +{"id":"unibi","officialname":"ELang journal (An English Education journal)","issn":"2502-7549","eissn":"","lissn":"2502-7549","openAccess":true} +{"id":"unibi","officialname":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","issn":"2423-3633","eissn":"","lissn":"2423-3625","openAccess":true} +{"id":"unibi","officialname":"Pizhūhishnāmah-i ̒ilm/sanjī.","issn":"2423-5563","eissn":"","lissn":"2423-3773","openAccess":true} +{"id":"unibi","officialname":"Iranian journal of animal biosystematics.","issn":"1735-434X","eissn":"","lissn":"1735-434X","openAccess":true} +{"id":"unibi","officialname":"Majallah-i jangal-i Īrān.","issn":"2423-4435","eissn":"","lissn":"2008-6113","openAccess":true} +{"id":"unibi","officialname":"Ābziyān-i zinatī.","issn":"2423-4575","eissn":"","lissn":"2423-4575","openAccess":true} +{"id":"unibi","officialname":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","issn":"2423-4974","eissn":"","lissn":"2423-4974","openAccess":true} +{"id":"unibi","officialname":"AIHM journal club.","issn":"2380-0607","eissn":"","lissn":"2380-0607","openAccess":true} +{"id":"unibi","officialname":"Frontiers.","issn":"1085-4568","eissn":"","lissn":"1085-4568","openAccess":true} +{"id":"unibi","officialname":"˜The œjournal of contemporary archival studies.","issn":"2380-8845","eissn":"","lissn":"2380-8845","openAccess":true} +{"id":"unibi","officialname":"International journal of complementary & alternative medicine.","issn":"2381-1803","eissn":"","lissn":"2381-1803","openAccess":true} +{"id":"unibi","officialname":"Palapala.","issn":"2381-2478","eissn":"","lissn":"2381-2478","openAccess":true} +{"id":"unibi","officialname":"Asia pacific journal of environment ecology and sustainable development.","issn":"2382-5170","eissn":"","lissn":"2382-5170","openAccess":true} +{"id":"unibi","officialname":"Majallah-i salāmat va bihdāsht","issn":"2382-9737","eissn":"","lissn":"2382-9737","openAccess":true} +{"id":"unibi","officialname":"UCT journal of research in science ,engineering and technology","issn":"2382-977X","eissn":"","lissn":"2382-977X","openAccess":true} +{"id":"unibi","officialname":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","issn":"2382-9974","eissn":"","lissn":"2382-9974","openAccess":true} +{"id":"unibi","officialname":"Problemi endokrinnoï patologìï.","issn":"2227-4782","eissn":"","lissn":"2227-4782","openAccess":true} +{"id":"unibi","officialname":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","issn":"2685-0079","eissn":"","lissn":"2597-4971","openAccess":true} +{"id":"unibi","officialname":"Hypermedia magazine.","issn":"2574-0075","eissn":"","lissn":"2574-0075","openAccess":true} +{"id":"unibi","officialname":"˜The œmuseum review.","issn":"2574-0296","eissn":"","lissn":"2574-0296","openAccess":true} +{"id":"unibi","officialname":"Bioactive compounds in health and disease.","issn":"2574-0334","eissn":"","lissn":"2574-0334","openAccess":true} +{"id":"unibi","officialname":"Journal of computer science integration.","issn":"2574-108X","eissn":"","lissn":"2574-108X","openAccess":true} +{"id":"unibi","officialname":"Child and adolescent obesity.","issn":"2574-254X","eissn":"","lissn":"2574-254X","openAccess":true} +{"id":"unibi","officialname":"Journal of research on the college president.","issn":"2574-3325","eissn":"","lissn":"2574-3325","openAccess":true} +{"id":"unibi","officialname":"European journal of sustainable development.","issn":"2239-6101","eissn":"","lissn":"2239-5938","openAccess":true} \ No newline at end of file From b1b0cc3f157df3998d2fe1392e6b5b7e76f75c12 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 29 Jul 2021 13:54:56 +0200 Subject: [PATCH 104/287] fixed wrong package name --- .../eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml index 8c045fcfe..2d97b5163 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml @@ -25,7 +25,7 @@ yarn-cluster cluster Create Action Set - eu.dnetlib.dhp.sx.provision.SparkCreateActionset + eu.dnetlib.dhp.actionmanager.scholix.SparkCreateActionset dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -52,7 +52,7 @@ yarn-cluster cluster Save Action Set - eu.dnetlib.dhp.sx.provision.SparkSaveActionSet + eu.dnetlib.dhp.actionmanager.scholix.SparkSaveActionSet dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} From 6e3554a45e4ac1598b3a6177c9d3a85f687b7325 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 13:56:37 +0200 Subject: [PATCH 105/287] [provision] lowercase relation filter --- .../java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index 7d53d3554..b3f785492 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -10,6 +10,7 @@ import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -81,6 +82,7 @@ public class PrepareRelationsJob { Set relationFilter = Optional .ofNullable(parser.get("relationFilter")) + .map(String::toLowerCase) .map(s -> Sets.newHashSet(Splitter.on(",").split(s))) .orElse(new HashSet<>()); log.info("relationFilter: {}", relationFilter); @@ -130,7 +132,7 @@ public class PrepareRelationsJob { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) .filter(rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter(rel -> relationFilter.contains(rel.getRelClass()) == false); + .filter(rel -> relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())) == false); JavaRDD pruned = pruneRels( pruneRels( From c53d106e80eca2af6456925130dd856bf5d33c49 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 13:56:37 +0200 Subject: [PATCH 106/287] [provision] lowercase relation filter --- .../java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index 7d53d3554..b3f785492 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -10,6 +10,7 @@ import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -81,6 +82,7 @@ public class PrepareRelationsJob { Set relationFilter = Optional .ofNullable(parser.get("relationFilter")) + .map(String::toLowerCase) .map(s -> Sets.newHashSet(Splitter.on(",").split(s))) .orElse(new HashSet<>()); log.info("relationFilter: {}", relationFilter); @@ -130,7 +132,7 @@ public class PrepareRelationsJob { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) .filter(rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter(rel -> relationFilter.contains(rel.getRelClass()) == false); + .filter(rel -> relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())) == false); JavaRDD pruned = pruneRels( pruneRels( From 6358f92c3a5301bd25ec00259453490d19e318aa Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 30 Jul 2021 08:54:25 +0200 Subject: [PATCH 107/287] added sleep to solve problem of lost request of creating index --- .../java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java index f96a64a27..ffeb0995d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java @@ -71,6 +71,9 @@ public class DropAndCreateESIndex { log.info(STATUS_CODE_TEXT, response.getStatusLine()); } + log.info("Sleeping 60 seconds to avoid to lost the creation of index request"); + Thread.sleep(60000); + try (CloseableHttpClient client = HttpClients.createDefault()) { final String summaryConf = IOUtils From 576693d782d2ff51908168a519f19a4f70097136 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 30 Jul 2021 10:13:33 +0200 Subject: [PATCH 108/287] added unit test for PatchRelationsApplication --- .../graph/raw/PatchRelationsApplication.java | 3 + .../raw/PatchRelationApplicationTest.java | 89 +++++++++++++++++++ .../dnetlib/dhp/oa/graph/raw/id_mapping.json | 5 ++ .../dhp/oa/graph/raw/relations_to_patch.json | 6 ++ 4 files changed, 103 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java index c2bcf69f0..dddc53bc8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -79,6 +79,9 @@ public class PatchRelationsApplication { final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); + log.info("relations: {}", rels.count()); + log.info("idMapping: {}", idMapping.count()); + rels .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") .map((MapFunction, Relation>) t -> { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java new file mode 100644 index 000000000..def53c085 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -0,0 +1,89 @@ +package eu.dnetlib.dhp.oa.graph.raw; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Optional; + +public class PatchRelationApplicationTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + public static final String ID_MAPPING_PATH = "map/id_mapping.json"; + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory.getLogger(PatchRelationApplicationTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(PatchRelationApplicationTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PatchRelationApplicationTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PatchRelationApplicationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + + FileUtils.copyInputStreamToFile( + PatchRelationApplicationTest.class.getResourceAsStream("id_mapping.json"), + workingDir.resolve(ID_MAPPING_PATH).toFile() + ); + + FileUtils.copyInputStreamToFile( + PatchRelationApplicationTest.class.getResourceAsStream("relations_to_patch.json"), + workingDir.resolve("graphBasePath/relation/rels.json").toFile() + ); + + } + + @BeforeEach + public void setUp() throws IOException { + + + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void testPatchRelationApplication() throws Exception { + + PatchRelationsApplication.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphBasePath", workingDir.toString() + "/graphBasePath", + "-workingDir", workingDir.toString() + "/workingDir", + "-idMappingPath", workingDir.toString() + "/" + ID_MAPPING_PATH + }); + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json new file mode 100644 index 000000000..640d042b1 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json @@ -0,0 +1,5 @@ +{"oldId": "1a", "newId": "1b"} +{"oldId": "2a", "newId": "2b"} +{"oldId": "3a", "newId": "3b"} +{"oldId": "4a", "newId": "4b"} +{"oldId": "5a", "newId": "5b"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json new file mode 100644 index 000000000..3e1203b18 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json @@ -0,0 +1,6 @@ +{"source":"1a","target":"10a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"10a","target":"1a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"2a","target":"20a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"20a","target":"2a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"15a","target":"25a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"25a","target":"15a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} From 9bc4fd3b69981716c6a6b96b1234871539bf2f79 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 10:34:05 +0200 Subject: [PATCH 109/287] Patch FCT relations - fixed issue with join --- .../dhp/oa/graph/raw/PatchRelationsApplication.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java index dddc53bc8..5bbc8a975 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -82,7 +82,7 @@ public class PatchRelationsApplication { log.info("relations: {}", rels.count()); log.info("idMapping: {}", idMapping.count()); - rels + Dataset fj = rels .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") .map((MapFunction, Relation>) t -> { final Relation r = t._1(); @@ -90,8 +90,9 @@ public class PatchRelationsApplication { .map(RelationIdMapping::getNewId) .ifPresent(r::setSource); return r; - }, Encoders.bean(Relation.class)) - .joinWith(idMapping, rels.col("target").equalTo(idMapping.col("oldId")), "left") + }, Encoders.bean(Relation.class)); + + fj.joinWith(idMapping, fj.col("target").equalTo(idMapping.col("oldId")), "left") .map((MapFunction, Relation>) t -> { final Relation r = t._1(); Optional.ofNullable(t._2()) From a6a38cca9ec383e992cbafe82b30159f573e3f2b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 30 Jul 2021 11:06:11 +0200 Subject: [PATCH 110/287] fixed implementation of PatchRelationsApplication, refined the relative unit test --- .../graph/raw/PatchRelationsApplication.java | 181 +++++++++--------- .../graph/raw/common/RelationIdMapping.java | 29 +-- .../raw/PatchRelationApplicationTest.java | 148 ++++++++------ .../dhp/oa/graph/raw/relations_to_patch.json | 2 +- 4 files changed, 198 insertions(+), 162 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java index dddc53bc8..5523863ff 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -1,10 +1,12 @@ + package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.raw.common.RelationIdMapping; -import eu.dnetlib.dhp.schema.oaf.Relation; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.FileNotFoundException; +import java.util.Objects; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -15,104 +17,111 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.raw.common.RelationIdMapping; +import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; -import java.io.FileNotFoundException; -import java.util.Objects; -import java.util.Optional; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - public class PatchRelationsApplication { - private static final Logger log = LoggerFactory.getLogger(PatchRelationsApplication.class); + private static final Logger log = LoggerFactory.getLogger(PatchRelationsApplication.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Optional.ofNullable( - PatchRelationsApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json")) - .orElseThrow(FileNotFoundException::new) - )); - parser.parseArgument(args); + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Optional + .ofNullable( + PatchRelationsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json")) + .orElseThrow(FileNotFoundException::new))); + parser.parseArgument(args); - final Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String graphBasePath = parser.get("graphBasePath"); - log.info("graphBasePath: {}", graphBasePath); + final String graphBasePath = parser.get("graphBasePath"); + log.info("graphBasePath: {}", graphBasePath); - final String workingDir = parser.get("workingDir"); - log.info("workingDir: {}", workingDir); + final String workingDir = parser.get("workingDir"); + log.info("workingDir: {}", workingDir); - final String idMappingPath = parser.get("idMappingPath"); - log.info("idMappingPath: {}", idMappingPath); + final String idMappingPath = parser.get("idMappingPath"); + log.info("idMappingPath: {}", idMappingPath); - final SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> patchRelations(spark, graphBasePath, workingDir, idMappingPath)); - } + final SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> patchRelations(spark, graphBasePath, workingDir, idMappingPath)); + } - /** - * Substitutes the identifiers (source/target) from the set of relations part of the graphBasePath included in the - * mapping provided by the dataset stored on idMappingPath, using workingDir as intermediate storage location. - * - * @param spark the SparkSession - * @param graphBasePath base graph path providing the set of relations to patch - * @param workingDir intermediate storage location - * @param idMappingPath dataset providing the old -> new identifier mapping - */ - private static void patchRelations(final SparkSession spark, final String graphBasePath, final String workingDir, final String idMappingPath) { + /** + * Substitutes the identifiers (source/target) from the set of relations part of the graphBasePath included in the + * mapping provided by the dataset stored on idMappingPath, using workingDir as intermediate storage location. + * + * @param spark the SparkSession + * @param graphBasePath base graph path providing the set of relations to patch + * @param workingDir intermediate storage location + * @param idMappingPath dataset providing the old -> new identifier mapping + */ + private static void patchRelations(final SparkSession spark, final String graphBasePath, final String workingDir, + final String idMappingPath) { - final String relationPath = graphBasePath + "/relation"; + final String relationPath = graphBasePath + "/relation"; - final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); - final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); + final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); + final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); - log.info("relations: {}", rels.count()); - log.info("idMapping: {}", idMapping.count()); + log.info("relations: {}", rels.count()); + log.info("idMapping: {}", idMapping.count()); - rels - .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") - .map((MapFunction, Relation>) t -> { - final Relation r = t._1(); - Optional.ofNullable(t._2()) - .map(RelationIdMapping::getNewId) - .ifPresent(r::setSource); - return r; - }, Encoders.bean(Relation.class)) - .joinWith(idMapping, rels.col("target").equalTo(idMapping.col("oldId")), "left") - .map((MapFunction, Relation>) t -> { - final Relation r = t._1(); - Optional.ofNullable(t._2()) - .map(RelationIdMapping::getNewId) - .ifPresent(r::setTarget); - return r; - }, Encoders.bean(Relation.class)) - .map( - (MapFunction) OBJECT_MAPPER::writeValueAsString, - Encoders.STRING()) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .text(workingDir); + final Dataset bySource = rels + .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional + .ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setSource); + return r; + }, Encoders.bean(Relation.class)); - spark.read().textFile(workingDir) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .text(relationPath); - } + bySource + .joinWith(idMapping, bySource.col("target").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional + .ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setTarget); + return r; + }, Encoders.bean(Relation.class)) + .map( + (MapFunction) OBJECT_MAPPER::writeValueAsString, + Encoders.STRING()) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(workingDir); + spark + .read() + .textFile(workingDir) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(relationPath); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java index f251da8c3..d5852ab70 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java @@ -1,24 +1,25 @@ + package eu.dnetlib.dhp.oa.graph.raw.common; public class RelationIdMapping { - private String oldId; + private String oldId; - private String newId; + private String newId; - public String getOldId() { - return oldId; - } + public String getOldId() { + return oldId; + } - public void setOldId(final String oldId) { - this.oldId = oldId; - } + public void setOldId(final String oldId) { + this.oldId = oldId; + } - public String getNewId() { - return newId; - } + public String getNewId() { + return newId; + } - public void setNewId(final String newId) { - this.newId = newId; - } + public void setNewId(final String newId) { + this.newId = newId; + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java index def53c085..3fd365416 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -1,89 +1,115 @@ + package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.Optional; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.oaf.Relation; public class PatchRelationApplicationTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static final String ID_MAPPING_PATH = "map/id_mapping.json"; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + public static final String ID_MAPPING_PATH = "map/id_mapping.json"; - private static SparkSession spark; + private static SparkSession spark; - private static Path workingDir; + private static Path workingDir; - private static final Logger log = LoggerFactory.getLogger(PatchRelationApplicationTest.class); + private static final Logger log = LoggerFactory.getLogger(PatchRelationApplicationTest.class); - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(PatchRelationApplicationTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(PatchRelationApplicationTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); - SparkConf conf = new SparkConf(); - conf.setAppName(PatchRelationApplicationTest.class.getSimpleName()); + SparkConf conf = new SparkConf(); + conf.setAppName(PatchRelationApplicationTest.class.getSimpleName()); - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - spark = SparkSession - .builder() - .appName(PatchRelationApplicationTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); + spark = SparkSession + .builder() + .appName(PatchRelationApplicationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); - FileUtils.copyInputStreamToFile( - PatchRelationApplicationTest.class.getResourceAsStream("id_mapping.json"), - workingDir.resolve(ID_MAPPING_PATH).toFile() - ); + FileUtils + .copyInputStreamToFile( + PatchRelationApplicationTest.class.getResourceAsStream("id_mapping.json"), + workingDir.resolve(ID_MAPPING_PATH).toFile()); - FileUtils.copyInputStreamToFile( - PatchRelationApplicationTest.class.getResourceAsStream("relations_to_patch.json"), - workingDir.resolve("graphBasePath/relation/rels.json").toFile() - ); + FileUtils + .copyInputStreamToFile( + PatchRelationApplicationTest.class.getResourceAsStream("relations_to_patch.json"), + workingDir.resolve("graphBasePath/relation/rels.json").toFile()); - } + } - @BeforeEach - public void setUp() throws IOException { + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + @Test + public void testPatchRelationApplication() throws Exception { - } + final String graphBasePath = workingDir.toString() + "/graphBasePath"; + PatchRelationsApplication.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphBasePath", graphBasePath, + "-workingDir", workingDir.toString() + "/workingDir", + "-idMappingPath", workingDir.toString() + "/" + ID_MAPPING_PATH + }); - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } + final List rels = spark + .read() + .textFile(graphBasePath + "/relation") + .map( + (MapFunction) s -> OBJECT_MAPPER.readValue(s, Relation.class), + Encoders.bean(Relation.class)) + .collectAsList(); - @Test - public void testPatchRelationApplication() throws Exception { + assertEquals(6, rels.size()); - PatchRelationsApplication.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-graphBasePath", workingDir.toString() + "/graphBasePath", - "-workingDir", workingDir.toString() + "/workingDir", - "-idMappingPath", workingDir.toString() + "/" + ID_MAPPING_PATH - }); + assertEquals(0, getCount(rels, "1a"), "should be patched to 1b"); + assertEquals(0, getCount(rels, "2a"), "should be patched to 2b"); - } + assertEquals(2, getCount(rels, "10a"), "not included in patching"); + assertEquals(2, getCount(rels, "20a"), "not included in patching"); + + assertEquals(2, getCount(rels, "15a"), "not included in patching"); + assertEquals(2, getCount(rels, "25a"), "not included in patching"); + + assertEquals(2, getCount(rels, "1b"), "patched from 1a"); + assertEquals(2, getCount(rels, "2b"), "patched from 2a"); + } + + private long getCount(List rels, final String id) { + return rels.stream().filter(r -> r.getSource().equals(id) || r.getTarget().equals(id)).count(); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json index 3e1203b18..31755c53d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json @@ -3,4 +3,4 @@ {"source":"2a","target":"20a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} {"source":"20a","target":"2a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} {"source":"15a","target":"25a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} -{"source":"25a","target":"15a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"25a","target":"15a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} \ No newline at end of file From 19620eed46d8f94474ebf27b21444f4c901080f7 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 30 Jul 2021 11:09:32 +0200 Subject: [PATCH 111/287] applying PR#131, Patch the identifiers (source/target) in the relations, refinements --- .../graph/raw/PatchRelationsApplication.java | 180 ++++++++++-------- .../graph/raw/common/RelationIdMapping.java | 29 +-- .../raw/PatchRelationApplicationTest.java | 115 +++++++++++ .../dnetlib/dhp/oa/graph/raw/id_mapping.json | 5 + .../dhp/oa/graph/raw/relations_to_patch.json | 6 + 5 files changed, 237 insertions(+), 98 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java index c2bcf69f0..5523863ff 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -1,10 +1,12 @@ + package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.raw.common.RelationIdMapping; -import eu.dnetlib.dhp.schema.oaf.Relation; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.FileNotFoundException; +import java.util.Objects; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -15,101 +17,111 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.raw.common.RelationIdMapping; +import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; -import java.io.FileNotFoundException; -import java.util.Objects; -import java.util.Optional; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - public class PatchRelationsApplication { - private static final Logger log = LoggerFactory.getLogger(PatchRelationsApplication.class); + private static final Logger log = LoggerFactory.getLogger(PatchRelationsApplication.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Optional.ofNullable( - PatchRelationsApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json")) - .orElseThrow(FileNotFoundException::new) - )); - parser.parseArgument(args); + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Optional + .ofNullable( + PatchRelationsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json")) + .orElseThrow(FileNotFoundException::new))); + parser.parseArgument(args); - final Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String graphBasePath = parser.get("graphBasePath"); - log.info("graphBasePath: {}", graphBasePath); + final String graphBasePath = parser.get("graphBasePath"); + log.info("graphBasePath: {}", graphBasePath); - final String workingDir = parser.get("workingDir"); - log.info("workingDir: {}", workingDir); + final String workingDir = parser.get("workingDir"); + log.info("workingDir: {}", workingDir); - final String idMappingPath = parser.get("idMappingPath"); - log.info("idMappingPath: {}", idMappingPath); + final String idMappingPath = parser.get("idMappingPath"); + log.info("idMappingPath: {}", idMappingPath); - final SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> patchRelations(spark, graphBasePath, workingDir, idMappingPath)); - } + final SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> patchRelations(spark, graphBasePath, workingDir, idMappingPath)); + } - /** - * Substitutes the identifiers (source/target) from the set of relations part of the graphBasePath included in the - * mapping provided by the dataset stored on idMappingPath, using workingDir as intermediate storage location. - * - * @param spark the SparkSession - * @param graphBasePath base graph path providing the set of relations to patch - * @param workingDir intermediate storage location - * @param idMappingPath dataset providing the old -> new identifier mapping - */ - private static void patchRelations(final SparkSession spark, final String graphBasePath, final String workingDir, final String idMappingPath) { + /** + * Substitutes the identifiers (source/target) from the set of relations part of the graphBasePath included in the + * mapping provided by the dataset stored on idMappingPath, using workingDir as intermediate storage location. + * + * @param spark the SparkSession + * @param graphBasePath base graph path providing the set of relations to patch + * @param workingDir intermediate storage location + * @param idMappingPath dataset providing the old -> new identifier mapping + */ + private static void patchRelations(final SparkSession spark, final String graphBasePath, final String workingDir, + final String idMappingPath) { - final String relationPath = graphBasePath + "/relation"; + final String relationPath = graphBasePath + "/relation"; - final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); - final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); + final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); + final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); - rels - .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") - .map((MapFunction, Relation>) t -> { - final Relation r = t._1(); - Optional.ofNullable(t._2()) - .map(RelationIdMapping::getNewId) - .ifPresent(r::setSource); - return r; - }, Encoders.bean(Relation.class)) - .joinWith(idMapping, rels.col("target").equalTo(idMapping.col("oldId")), "left") - .map((MapFunction, Relation>) t -> { - final Relation r = t._1(); - Optional.ofNullable(t._2()) - .map(RelationIdMapping::getNewId) - .ifPresent(r::setTarget); - return r; - }, Encoders.bean(Relation.class)) - .map( - (MapFunction) OBJECT_MAPPER::writeValueAsString, - Encoders.STRING()) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .text(workingDir); + log.info("relations: {}", rels.count()); + log.info("idMapping: {}", idMapping.count()); - spark.read().textFile(workingDir) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .text(relationPath); - } + final Dataset bySource = rels + .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional + .ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setSource); + return r; + }, Encoders.bean(Relation.class)); + bySource + .joinWith(idMapping, bySource.col("target").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional + .ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setTarget); + return r; + }, Encoders.bean(Relation.class)) + .map( + (MapFunction) OBJECT_MAPPER::writeValueAsString, + Encoders.STRING()) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(workingDir); + + spark + .read() + .textFile(workingDir) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(relationPath); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java index f251da8c3..d5852ab70 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java @@ -1,24 +1,25 @@ + package eu.dnetlib.dhp.oa.graph.raw.common; public class RelationIdMapping { - private String oldId; + private String oldId; - private String newId; + private String newId; - public String getOldId() { - return oldId; - } + public String getOldId() { + return oldId; + } - public void setOldId(final String oldId) { - this.oldId = oldId; - } + public void setOldId(final String oldId) { + this.oldId = oldId; + } - public String getNewId() { - return newId; - } + public String getNewId() { + return newId; + } - public void setNewId(final String newId) { - this.newId = newId; - } + public void setNewId(final String newId) { + this.newId = newId; + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java new file mode 100644 index 000000000..3fd365416 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -0,0 +1,115 @@ + +package eu.dnetlib.dhp.oa.graph.raw; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.oaf.Relation; + +public class PatchRelationApplicationTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + public static final String ID_MAPPING_PATH = "map/id_mapping.json"; + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory.getLogger(PatchRelationApplicationTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(PatchRelationApplicationTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PatchRelationApplicationTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PatchRelationApplicationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + + FileUtils + .copyInputStreamToFile( + PatchRelationApplicationTest.class.getResourceAsStream("id_mapping.json"), + workingDir.resolve(ID_MAPPING_PATH).toFile()); + + FileUtils + .copyInputStreamToFile( + PatchRelationApplicationTest.class.getResourceAsStream("relations_to_patch.json"), + workingDir.resolve("graphBasePath/relation/rels.json").toFile()); + + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void testPatchRelationApplication() throws Exception { + + final String graphBasePath = workingDir.toString() + "/graphBasePath"; + PatchRelationsApplication.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphBasePath", graphBasePath, + "-workingDir", workingDir.toString() + "/workingDir", + "-idMappingPath", workingDir.toString() + "/" + ID_MAPPING_PATH + }); + + final List rels = spark + .read() + .textFile(graphBasePath + "/relation") + .map( + (MapFunction) s -> OBJECT_MAPPER.readValue(s, Relation.class), + Encoders.bean(Relation.class)) + .collectAsList(); + + assertEquals(6, rels.size()); + + assertEquals(0, getCount(rels, "1a"), "should be patched to 1b"); + assertEquals(0, getCount(rels, "2a"), "should be patched to 2b"); + + assertEquals(2, getCount(rels, "10a"), "not included in patching"); + assertEquals(2, getCount(rels, "20a"), "not included in patching"); + + assertEquals(2, getCount(rels, "15a"), "not included in patching"); + assertEquals(2, getCount(rels, "25a"), "not included in patching"); + + assertEquals(2, getCount(rels, "1b"), "patched from 1a"); + assertEquals(2, getCount(rels, "2b"), "patched from 2a"); + } + + private long getCount(List rels, final String id) { + return rels.stream().filter(r -> r.getSource().equals(id) || r.getTarget().equals(id)).count(); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json new file mode 100644 index 000000000..640d042b1 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json @@ -0,0 +1,5 @@ +{"oldId": "1a", "newId": "1b"} +{"oldId": "2a", "newId": "2b"} +{"oldId": "3a", "newId": "3b"} +{"oldId": "4a", "newId": "4b"} +{"oldId": "5a", "newId": "5b"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json new file mode 100644 index 000000000..31755c53d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json @@ -0,0 +1,6 @@ +{"source":"1a","target":"10a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"10a","target":"1a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"2a","target":"20a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"20a","target":"2a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"15a","target":"25a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"25a","target":"15a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} \ No newline at end of file From 11e26c020a85b874bec96e870cd9856c7a204000 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 30 Jul 2021 11:54:13 +0200 Subject: [PATCH 112/287] Update 'README.md' --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 39d4d98e4..0a0bd82ab 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ # dnet-hadoop -Dnet-hadoop is a tool for \ No newline at end of file +Dnet-hadoop is the project that defined all the OOZIE workflows for the OpenAIRE Graph construction, processing, provisioning. \ No newline at end of file From e244f73165a6b90a9b36686a0180685128695f8e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 30 Jul 2021 11:54:13 +0200 Subject: [PATCH 113/287] Update 'README.md' --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 39d4d98e4..0a0bd82ab 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ # dnet-hadoop -Dnet-hadoop is a tool for \ No newline at end of file +Dnet-hadoop is the project that defined all the OOZIE workflows for the OpenAIRE Graph construction, processing, provisioning. \ No newline at end of file From 613bd3bde0076113dd37a64d2efbc23ec8e9ec61 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 17:54:45 +0200 Subject: [PATCH 114/287] Hosted By Map - refactor of the first attemp to prepare a new hosted by map dependent on the datasource in the graph and on two external sources: the gold list from unibi ad the doaj list of open access journal. Both the lists are downloaded from provided url parameter --- .../graph/hostedbymap/SparkProduceHostedByMap.scala | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index c44f2cbed..ca5e82a4a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -55,20 +55,7 @@ object SparkProduceHostedByMap { } - /** - * - def toHostedByMap(input: Map[String, HostedByItemType]): String = { - import org.json4s.jackson.Serialization - implicit val formats = org.json4s.DefaultFormats - - - - Serialization.write(input) - - - } - */ def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { if(issn != null){ From d8b9b0553b7c89b7831625dd3804384c9dab4e03 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 17:55:39 +0200 Subject: [PATCH 115/287] Hosted By Map - model classes to store the intermediate information to be used to apply the hosted by map --- .../hostedbymap/model/DatasourceInfo.java | 72 +++++++++++++++++++ .../graph/hostedbymap/model/EntityInfo.java | 69 ++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java new file mode 100644 index 000000000..f5ac8f70c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java @@ -0,0 +1,72 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +public class DatasourceInfo implements Serializable { + private String id; + private String officialname; + private String issn; + private String eissn; + private String lissn; + private Boolean openAccess; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getOfficialname() { + return officialname; + } + + public void setOfficialname(String officialname) { + this.officialname = officialname; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getLissn() { + return lissn; + } + + public void setLissn(String lissn) { + this.lissn = lissn; + } + + public Boolean getOpenAccess() { + return openAccess; + } + + public void setOpenAccess(Boolean openAccess) { + this.openAccess = openAccess; + } + + public static DatasourceInfo newInstance(String id, String officialname, String issn, String eissn, String lissn){ + DatasourceInfo di = new DatasourceInfo(); + di.id = id; + di.officialname = officialname; + di.issn = (issn != null) ? issn : "" ; + di.eissn = (eissn != null) ? eissn:""; + di.lissn = (lissn != null) ? lissn : ""; + di.openAccess = false; + + return di; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java new file mode 100644 index 000000000..7bf9d0598 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java @@ -0,0 +1,69 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +public class EntityInfo implements Serializable { + private String id; + private String journal_id; + private String name; + private Boolean openaccess; + private String hb_id; + + + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getJournal_id() { + return journal_id; + } + + public void setJournal_id(String journal_id) { + this.journal_id = journal_id; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public Boolean getOpenaccess() { + return openaccess; + } + + public void setOpenaccess(Boolean openaccess) { + this.openaccess = openaccess; + } + + public String getHb_id() { + return hb_id; + } + + public void setHb_id(String hb_id) { + this.hb_id = hb_id; + } + + public static EntityInfo newInstance(String id, String j_id, String name){ + return newInstance(id, j_id, name, false); + + } + + public static EntityInfo newInstance(String id, String j_id, String name, Boolean openaccess){ + EntityInfo pi = new EntityInfo(); + pi.id = id; + pi.journal_id = j_id; + pi.name = name; + pi.openaccess = openaccess; + pi.hb_id = ""; + return pi; + + } +} From 7c6ea2f4c700c3ce59dd78e5e4eed4ea8c198c5e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 17:56:27 +0200 Subject: [PATCH 116/287] Hosted By Map - first attempt for the creation of intermedia information to be used to applu the hosted by map on the graph entities --- .../oa/graph/hostedbymap/Aggregators.scala | 113 +++++++++----- .../SparkPrepareHostedByInfoToApply.scala | 147 ++++++++++++++++++ 2 files changed, 225 insertions(+), 35 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index 6a9346ed5..8077efe30 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -1,5 +1,6 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} import org.apache.spark.sql.expressions.Aggregator @@ -25,43 +26,10 @@ object Aggregators { } - def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { - val transformedData : Dataset[HostedByItemType] = df - .groupByKey(_.id)(Encoders.STRING) - .agg(Aggregators.hostedByAggregator) - .map{ - case (id:String , res:HostedByItemType) => res - }(Encoders.product[HostedByItemType]) - - transformedData - } - - val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { - override def zero: HostedByItemType = HostedByItemType("","","","","",false) - override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { - return merge(b, a) - } - override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - - HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) - - } - override def finish(reduction: HostedByItemType): HostedByItemType = reduction - override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - - override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - }.toColumn - def explodeHostedByItemType(df: Dataset[(String, HostedByItemType)]): Dataset[(String, HostedByItemType)] = { val transformedData : Dataset[(String, HostedByItemType)] = df .groupByKey(_._1)(Encoders.STRING) - .agg(Aggregators.hostedByAggregator1) + .agg(Aggregators.hostedByAggregator) .map{ case (id:String , res:(String, HostedByItemType)) => res }(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) @@ -69,7 +37,7 @@ object Aggregators { transformedData } - val hostedByAggregator1: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { + val hostedByAggregator: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { override def zero: (String, HostedByItemType) = ("", HostedByItemType("","","","","",false)) override def reduce(b: (String, HostedByItemType), a:(String,HostedByItemType)): (String, HostedByItemType) = { return merge(b, a) @@ -94,4 +62,79 @@ object Aggregators { override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) }.toColumn + def hostedByToSingleDSId(df: Dataset[ HostedByItemType]): Dataset[ HostedByItemType] = { + val transformedData : Dataset[HostedByItemType] = df + .groupByKey(_.id)(Encoders.STRING) + .agg(Aggregators.hostedByToDSAggregator) + .map{ + case (id:String , res: HostedByItemType) => res + }(Encoders.product[HostedByItemType]) + + transformedData + } + + def hostedByToDSAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { + override def zero: HostedByItemType = HostedByItemType("","","","","",false) + + override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { + return merge(b, a) + } + override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(!b1.id.equals("")){ + return HostedByItemType(b1.id, b1.officialname, b1.issn, b1.eissn, b1.lissn, b1.openAccess || b2.openAccess) + + } + return HostedByItemType(b2.id, b2.officialname, b2.issn, b2.eissn, b2.lissn, b1.openAccess || b2.openAccess) + + } + override def finish(reduction: HostedByItemType): HostedByItemType = reduction + override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + + override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + }.toColumn + + + def resultToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ + override def zero: EntityInfo = EntityInfo.newInstance("","","") + + override def reduce(b: EntityInfo, a:EntityInfo): EntityInfo = { + return merge(b, a) + } + override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(!b1.getHb_id.equals("")){ + b1.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + } + b2.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + b2 + + } + override def finish(reduction: EntityInfo): EntityInfo = reduction + override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + }.toColumn + + def resultToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { + val transformedData : Dataset[EntityInfo] = df + .groupByKey(_.getId)(Encoders.STRING) + .agg(Aggregators.resultToSingleIdAggregator) + .map{ + case (id:String , res: EntityInfo) => res + }(Encoders.bean(classOf[EntityInfo])) + + transformedData + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala new file mode 100644 index 000000000..03ab09d8e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -0,0 +1,147 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DatasourceInfo, EntityInfo} +import eu.dnetlib.dhp.schema.oaf.{Datasource, Journal, Publication} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods.parse +import org.slf4j.{Logger, LoggerFactory} + +object SparkPrepareHostedByInfoToApply { + + + implicit val mapEncoderDSInfo: Encoder[DatasourceInfo] = Encoders.kryo[DatasourceInfo] + implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.kryo[EntityInfo] + + def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { + var lst:List[EntityInfo] = List() + + + if (j.getIssnLinking != null && !j.getIssnLinking.equals("")){ + lst = EntityInfo.newInstance(id, j.getIssnLinking, name) :: lst + } + if (j.getIssnOnline != null && !j.getIssnOnline.equals("")){ + lst = EntityInfo.newInstance(id, j.getIssnOnline, name) :: lst + } + if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")){ + lst = EntityInfo.newInstance(id, j.getIssnPrinted, name) :: lst + } + lst + } + + def prepareResultInfo(spark:SparkSession, publicationPath:String) : Dataset[EntityInfo] = { + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) + + val mapper = new ObjectMapper() + + val dd : Dataset[Publication] = spark.read.textFile(publicationPath) + .map(r => mapper.readValue(r, classOf[Publication])) + + dd.filter(p => p.getJournal != null ).flatMap(p => getList(p.getId, p.getJournal, "")) + + } + + + + def prepareDatasourceInfo(spark:SparkSession, datasourcePath:String) : Dataset[DatasourceInfo] = { + implicit val mapEncoderDats: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + + val mapper = new ObjectMapper() + + val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[Datasource])) + + dd.filter(d => d.getJournal != null ).map(d => DatasourceInfo.newInstance(d.getId, d.getOfficialname.getValue, + d.getJournal.getIssnPrinted, d.getJournal.getIssnOnline, d.getJournal.getIssnLinking)) + + } + def toHostedByItem(input:String): HostedByItemType = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + + lazy val json: json4s.JValue = parse(input) + val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] + c.values.head + } + + def explodeJournalInfo(input: DatasourceInfo): List[EntityInfo] = { + var lst : List[EntityInfo] = List() + if (input.getEissn != null && !input.getEissn.equals("")){ + lst = EntityInfo.newInstance(input.getId, input.getEissn, input.getOfficialname, input.getOpenAccess) :: lst + } + + lst + } + + def main(args: Array[String]): Unit = { + + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphPath = parser.get("graphPath") + + val outputPath = parser.get("outputPath") + val hostedByMapPath = parser.get("hostedByMapPath") + + + implicit val formats = DefaultFormats + + + logger.info("Getting the Datasources") + + import spark.implicits._ + + //STEP1: leggere le DS e creare le entries {dsid, dsofficialname, issn, eissn, lissn, openaccess} + val datasourceInfoDataset: Dataset[DatasourceInfo] = prepareDatasourceInfo(spark, "$graphPath/datasource") + + //STEP2: leggere la hostedbymap e raggruppare per datasource id + val hostedByDataset = Aggregators.hostedByToSingleDSId(spark.createDataset(spark.sparkContext.textFile(hostedByMapPath).map(toHostedByItem))) + + //STEP3: eseguire una join fra le datasource e la hostedby map (left) per settare se la datasource e' open access o no + //ed esplodere l'info della datasource per ogni journal id diverso da nullo + val join : Dataset[EntityInfo] = datasourceInfoDataset.joinWith(hostedByDataset, + datasourceInfoDataset.col("id").equalTo(hostedByDataset.col("id"), "left")) + .map(t2 => { + val dsi : DatasourceInfo = t2._1 + if(t2._2 != null){ + dsi.setOpenAccess(t2._2.openAccess) + } + dsi + }).flatMap(explodeJournalInfo) + + //STEP4: creare la mappa publication id issn, eissn, lissn esplosa + val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, "$graphPath/publication") + + //STEP5: join di join con resultInfo sul journal_id dal result con left + // e riduzione di tutti i result con lo stesso id in una unica entry + Aggregators.resultToSingleId(resultInfoDataset.joinWith(join, resultInfoDataset.col("journal_id").equalTo(join.col("journal_id")), "left") + .map(t2 => { + val res: EntityInfo = t2._1 + if(t2._2 != null ){ + val ds = t2._2 + res.setHb_id(ds.getId) + res.setOpenaccess(ds.getOpenaccess) + res.setName(ds.getName) + } + res + })).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) + + + + } + +} From 1695d45bd41a772319d357642df536cd8634faf2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 17:57:01 +0200 Subject: [PATCH 117/287] Hosted By Map - Test class to verify the preparation of the intermediate information --- .../oa/graph/hostedbymap/TestPrepare.scala | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala new file mode 100644 index 000000000..f01e4f59f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -0,0 +1,53 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DatasourceInfo +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, SparkSession} +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.Test + +class TestPrepare extends java.io.Serializable{ + + @Test + def testPrepareDatasource():Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("datasource.json").getPath + + val ds :Dataset[DatasourceInfo]= SparkPrepareHostedByInfoToApply.prepareDatasourceInfo(spark, path) + + val mapper :ObjectMapper = new ObjectMapper() + + assertEquals(9, ds.count) + + assertEquals(8, ds.filter(hbi => !hbi.getIssn.equals("")).count) + assertEquals(5, ds.filter(hbi => !hbi.getEissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.getLissn.equals("")).count) + + assertEquals(0, ds.filter(hbi => hbi.getIssn.equals("") && hbi.getEissn.equals("") && hbi.getLissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365")).count == 1) + assertTrue(ds.filter(hbi => hbi.getEissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getEissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getOfficialname.equals("Thémata")).count == 1) + assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getId.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) + ds.foreach(hbi => assertTrue(hbi.getId.startsWith("10|"))) + ds.foreach(e => println(mapper.writeValueAsString(e))) + spark.close() + } + + @Test + def testPrepareHostedByMap():Unit = { + + } + + +} From fd55c77d979dadb051ef37250f0c0f52022de757 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 2 Aug 2021 13:48:42 +0200 Subject: [PATCH 118/287] updated dependency dhp-schemas:2.7.15 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index fc4a8a21b..433c88093 100644 --- a/pom.xml +++ b/pom.xml @@ -741,7 +741,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.14] + [2.7.15] [4.0.3] [6.0.5] [3.1.6] From e826aae84811ec1143ddd7134ef11879846f043b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 2 Aug 2021 14:28:59 +0200 Subject: [PATCH 119/287] using constants from ModelConstants --- .../eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala | 12 ++++++------ .../dhp/sx/graph/bio/pubmed/PubMedToOaf.scala | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala index a19c6fb12..90b65c8f7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala @@ -199,7 +199,7 @@ object BioDBToOAF { d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) } val relevant_dates: List[StructuredProperty] = dates.filter(d => !d.date_info.contains("entry version")) - .map(date => OafMapperUtils.structuredProperty(date.date, "UNKNOWN", "UNKNOWN", ModelConstants.DNET_DATACITE_DATE, ModelConstants.DNET_DATACITE_DATE, DATA_INFO)) + .map(date => OafMapperUtils.structuredProperty(date.date, ModelConstants.UNKNOWN, ModelConstants.UNKNOWN, ModelConstants.DNET_DATACITE_DATE, ModelConstants.DNET_DATACITE_DATE, DATA_INFO)) if (relevant_dates != null && relevant_dates.nonEmpty) d.setRelevantdate(relevant_dates.asJava) d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) @@ -218,12 +218,12 @@ object BioDBToOAF { if (references_pmid != null && references_pmid.nonEmpty) { - val rel = createRelation(references_pmid.head, "pmid", d.getId, collectedFromMap("uniprot"), "relationship", "isRelatedTo", if (i_date.isDefined) i_date.get.date else null) + val rel = createRelation(references_pmid.head, "pmid", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) rel.getCollectedfrom List(d, rel) } else if (references_doi != null && references_doi.nonEmpty) { - val rel = createRelation(references_doi.head, "doi", d.getId, collectedFromMap("uniprot"), "relationship", "isRelatedTo", if (i_date.isDefined) i_date.get.date else null) + val rel = createRelation(references_doi.head, "doi", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) List(d, rel) } else @@ -243,7 +243,7 @@ object BioDBToOAF { rel.setCollectedfrom(List(collectedFromMap("pdb")).asJava) rel.setDataInfo(DATA_INFO) - rel.setRelType("resultResult") + rel.setRelType(ModelConstants.RESULT_RESULT) rel.setSubRelType(subRelType) rel.setRelClass(relClass) @@ -263,7 +263,7 @@ object BioDBToOAF { def createSupplementaryRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, date:String): Relation = { - createRelation(pid,pidType,sourceId,collectedFrom, "supplement","IsSupplementTo", date) + createRelation(pid,pidType,sourceId,collectedFrom, ModelConstants.SUPPLEMENT, ModelConstants.IS_SUPPLEMENT_TO, date) } @@ -392,6 +392,6 @@ object BioDBToOAF { i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) - List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"),"relationship", "isRelatedTo", GraphCleaningFunctions.cleanDate(input.date))) + List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, GraphCleaningFunctions.cleanDate(input.date))) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala index ae4a72062..9a49deebc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala @@ -16,7 +16,7 @@ object PubMedToOaf { ) def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { - val result_typologies = getVocabularyTerm("dnet:result_typologies", vocabularies, cobjQualifier.getClassid) + val result_typologies = getVocabularyTerm(ModelConstants.DNET_RESULT_TYPOLOGIES, vocabularies, cobjQualifier.getClassid) result_typologies.getClassid match { case "dataset" => new Dataset case "publication" => new Publication @@ -68,11 +68,11 @@ object PubMedToOaf { //else We have to find a terms that match the vocabulary otherwise we discard it val ja = article.getPublicationTypes.asScala.find(s => "Journal Article".equalsIgnoreCase(s.getValue)) if (ja.isDefined) { - val cojbCategory = getVocabularyTerm("dnet:publication_resource", vocabularies, ja.get.getValue) + val cojbCategory = getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue) i.setInstancetype(cojbCategory) } else { val i_type = article.getPublicationTypes.asScala - .map(s => getVocabularyTerm("dnet:publication_resource", vocabularies, s.getValue)) + .map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)) .find(q => q != null) if (i_type.isDefined) i.setInstancetype(i_type.get) @@ -112,7 +112,7 @@ object PubMedToOaf { if (article.getLanguage != null) { - val term = vocabularies.getSynonymAsQualifier("dnet:languages", article.getLanguage) + val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, article.getLanguage) if (term != null) result.setLanguage(term) } From ff1ce75e33e0f7cb2293c50e40b7ff83dd89bedb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:32:59 +0200 Subject: [PATCH 120/287] Hosted By Map - modification in the code to prepare the info needed to apply the HostedByMap. There is no need to join datasources with the hbm: all the information needed is in the hosted by map already --- .../SparkPrepareHostedByInfoToApply.scala | 104 +++++++++--------- 1 file changed, 55 insertions(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index 03ab09d8e..7395b2450 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -3,7 +3,8 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DatasourceInfo, EntityInfo} -import eu.dnetlib.dhp.schema.oaf.{Datasource, Journal, Publication} + +import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} @@ -12,11 +13,13 @@ import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} + + object SparkPrepareHostedByInfoToApply { - implicit val mapEncoderDSInfo: Encoder[DatasourceInfo] = Encoders.kryo[DatasourceInfo] - implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.kryo[EntityInfo] + implicit val mapEncoderDSInfo: Encoder[DatasourceInfo] = Encoders.bean(classOf[DatasourceInfo]) + implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { var lst:List[EntityInfo] = List() @@ -47,25 +50,12 @@ object SparkPrepareHostedByInfoToApply { } - - def prepareDatasourceInfo(spark:SparkSession, datasourcePath:String) : Dataset[DatasourceInfo] = { - implicit val mapEncoderDats: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) - - val mapper = new ObjectMapper() - - val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) - .map(r => mapper.readValue(r, classOf[Datasource])) - - dd.filter(d => d.getJournal != null ).map(d => DatasourceInfo.newInstance(d.getId, d.getOfficialname.getValue, - d.getJournal.getIssnPrinted, d.getJournal.getIssnOnline, d.getJournal.getIssnLinking)) - - } - def toHostedByItem(input:String): HostedByItemType = { + def toEntityInfo(input:String): EntityInfo = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] - c.values.head + toEntityItem(c.keys.head, c.values.head) } def explodeJournalInfo(input: DatasourceInfo): List[EntityInfo] = { @@ -73,10 +63,49 @@ object SparkPrepareHostedByInfoToApply { if (input.getEissn != null && !input.getEissn.equals("")){ lst = EntityInfo.newInstance(input.getId, input.getEissn, input.getOfficialname, input.getOpenAccess) :: lst } + if (input.getLissn != null && !input.getLissn.equals("")){ + lst = EntityInfo.newInstance(input.getId, input.getLissn, input.getOfficialname, input.getOpenAccess) :: lst + } + if (input.getIssn != null && !input.getIssn.equals("")){ + lst = EntityInfo.newInstance(input.getId, input.getIssn, input.getOfficialname, input.getOpenAccess) :: lst + } lst } + def joinDsandHBM(left:Dataset[DatasourceInfo], right:Dataset[HostedByItemType]): Dataset[EntityInfo] = { + left.joinWith(right, + left.col("id").equalTo(right.col("id")), "left") + .map(t2 => { + val dsi : DatasourceInfo = t2._1 + if(t2._2 != null){ + val hbi : HostedByItemType = t2._2 + dsi.setOpenAccess(hbi.openAccess) + } + dsi + }).flatMap(explodeJournalInfo) + } + + def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { + + EntityInfo.newInstance(hbi.id, journal_id, hbi.officialname, hbi.openAccess) + + } + + def joinResHBM(res: Dataset[EntityInfo], hbm: Dataset[EntityInfo]): Dataset[EntityInfo] = { + Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journal_id").equalTo(hbm.col("journal_id")), "left") + .map(t2 => { + val res: EntityInfo = t2._1 + if(t2._2 != null ){ + val ds = t2._2 + res.setHb_id(ds.getId) + res.setOpenaccess(ds.getOpenaccess) + res.setName(ds.getName) + } + res + })) + } + def main(args: Array[String]): Unit = { @@ -105,43 +134,20 @@ object SparkPrepareHostedByInfoToApply { import spark.implicits._ - //STEP1: leggere le DS e creare le entries {dsid, dsofficialname, issn, eissn, lissn, openaccess} - val datasourceInfoDataset: Dataset[DatasourceInfo] = prepareDatasourceInfo(spark, "$graphPath/datasource") - //STEP2: leggere la hostedbymap e raggruppare per datasource id - val hostedByDataset = Aggregators.hostedByToSingleDSId(spark.createDataset(spark.sparkContext.textFile(hostedByMapPath).map(toHostedByItem))) + //STEP1: leggere la hostedbymap e trasformarla in entity info + val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP3: eseguire una join fra le datasource e la hostedby map (left) per settare se la datasource e' open access o no - //ed esplodere l'info della datasource per ogni journal id diverso da nullo - val join : Dataset[EntityInfo] = datasourceInfoDataset.joinWith(hostedByDataset, - datasourceInfoDataset.col("id").equalTo(hostedByDataset.col("id"), "left")) - .map(t2 => { - val dsi : DatasourceInfo = t2._1 - if(t2._2 != null){ - dsi.setOpenAccess(t2._2.openAccess) - } - dsi - }).flatMap(explodeJournalInfo) - - //STEP4: creare la mappa publication id issn, eissn, lissn esplosa + //STEP2: creare la mappa publication id issn, eissn, lissn esplosa val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, "$graphPath/publication") - //STEP5: join di join con resultInfo sul journal_id dal result con left - // e riduzione di tutti i result con lo stesso id in una unica entry - Aggregators.resultToSingleId(resultInfoDataset.joinWith(join, resultInfoDataset.col("journal_id").equalTo(join.col("journal_id")), "left") - .map(t2 => { - val res: EntityInfo = t2._1 - if(t2._2 != null ){ - val ds = t2._2 - res.setHb_id(ds.getId) - res.setOpenaccess(ds.getOpenaccess) - res.setName(ds.getName) - } - res - })).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) - + //STEP3: join resultInfo con hostedByInfo sul journal_id dal result con left + // e riduzione di tutti i result con lo stesso id in una unica entry con aggiunto l'id della datasource + joinResHBM(resultInfoDataset, hostedByInfo) + .write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) } + } From 72df8f92320c7d0ddcecece72a8e3502f5b03022 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:34:44 +0200 Subject: [PATCH 121/287] Hosted By Map - removed the aggregator for the datasource (it is no more needed) and added a new aggregator for the results. Changed also the hostedBYMap aggregator --- .../oa/graph/hostedbymap/Aggregators.scala | 36 +------------------ 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index 8077efe30..af4ac4507 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -62,42 +62,7 @@ object Aggregators { override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) }.toColumn - def hostedByToSingleDSId(df: Dataset[ HostedByItemType]): Dataset[ HostedByItemType] = { - val transformedData : Dataset[HostedByItemType] = df - .groupByKey(_.id)(Encoders.STRING) - .agg(Aggregators.hostedByToDSAggregator) - .map{ - case (id:String , res: HostedByItemType) => res - }(Encoders.product[HostedByItemType]) - transformedData - } - - def hostedByToDSAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { - override def zero: HostedByItemType = HostedByItemType("","","","","",false) - - override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { - return merge(b, a) - } - override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - if(!b1.id.equals("")){ - return HostedByItemType(b1.id, b1.officialname, b1.issn, b1.eissn, b1.lissn, b1.openAccess || b2.openAccess) - - } - return HostedByItemType(b2.id, b2.officialname, b2.issn, b2.eissn, b2.lissn, b1.openAccess || b2.openAccess) - - } - override def finish(reduction: HostedByItemType): HostedByItemType = reduction - override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - - override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - }.toColumn def resultToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ @@ -115,6 +80,7 @@ object Aggregators { } if(!b1.getHb_id.equals("")){ b1.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + return b1 } b2.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) b2 From 1e859706a3516b8103342cbe875004a6c112d35b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:35:23 +0200 Subject: [PATCH 122/287] Hosted By Map - Classes to apply the HBM to results and datasources --- .../SparkApplyHostedByMapToDatasource.scala | 72 +++++++++++++++ .../SparkApplyHostedByMapToResult.scala | 88 +++++++++++++++++++ 2 files changed, 160 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala new file mode 100644 index 000000000..4ecea63f3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -0,0 +1,72 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult.{applyHBtoPubs, getClass} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Datasource, Publication} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} + +object SparkApplyHostedByMapToDatasource { + + def applyHBtoDats(join: Dataset[EntityInfo], dats: Dataset[Datasource]): Dataset[Datasource] = { + dats.joinWith(join, dats.col("id").equalTo(join.col("hb_id")), "left") + .map(t2 => { + val d: Datasource = t2._1 + if (t2._2 != null) { + if (d.getOpenairecompatibility.getClassid.equals(ModelConstants.UNKNOWN)) { + d.getOpenairecompatibility.setClassid("hostedBy") + d.getOpenairecompatibility.setClassname("collected from a compatible aggregator") + } + } + d + })(Encoders.bean((classOf[Datasource]))) + } + def main(args: Array[String]): Unit = { + + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphPath = parser.get("graphPath") + + val outputPath = parser.get("outputPath") + val workingPath = parser.get("workingPath") + + + implicit val formats = DefaultFormats + + + implicit val mapEncoderPubs: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + val mapper = new ObjectMapper() + + val dats : Dataset[Datasource] = spark.read.textFile("$graphPath/datasource") + .map(r => mapper.readValue(r, classOf[Datasource])) + + val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + + + + //c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario + + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(s"$graphPath/datasource") + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala new file mode 100644 index 000000000..37afc319a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -0,0 +1,88 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.dhp.schema.oaf.{Datasource, Instance, OpenAccessRoute, Publication} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.JavaConverters._ + +//a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance +// nel result => salto)con l'hosted by anche access right della instance se openaccess e' true + + +object SparkApplyHostedByMapToResult { + + def applyHBtoPubs(join: Dataset[EntityInfo], pubs: Dataset[Publication]) = { + pubs.joinWith(join, pubs.col("id").equalTo(join.col("id")), "left") + .map(t2 => { + val p: Publication = t2._1 + if (t2._2 != null) { + val ei: EntityInfo = t2._2 + val i = p.getInstance().asScala + if (i.size == 1) { + val inst: Instance = i(0) + + inst.getHostedby.setKey(ei.getHb_id) + inst.getHostedby.setValue(ei.getName) + if (ei.getOpenaccess) { + inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) + inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.hybrid) + } + + } + } + p + })(Encoders.bean(classOf[Publication])) + } + def main(args: Array[String]): Unit = { + + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphPath = parser.get("graphPath") + + val outputPath = parser.get("outputPath") + val workingPath = parser.get("workingPath") + + + implicit val formats = DefaultFormats + + + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) + implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + val mapper = new ObjectMapper() + + val pubs : Dataset[Publication] = spark.read.textFile("$graphPath/publication") + .map(r => mapper.readValue(r, classOf[Publication])) + + val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + + //a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance + // nel result => salto)con l'hosted by anche access right della instance se openaccess e' true + applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json("$graphPath/publication") + + + + } + + +} From 90e91486e2cc63a51e51f5e2412ad3ff7c49ba19 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:35:52 +0200 Subject: [PATCH 123/287] Hosted By Map - test class to verify each step in the preparation process --- .../oa/graph/hostedbymap/TestPrepare.scala | 142 +++++++++++++++--- 1 file changed, 124 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index f01e4f59f..efd598fef 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -1,16 +1,31 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DatasourceInfo +import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} +import javax.management.openmbean.OpenMBeanAttributeInfo import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.json4s +import org.json4s.DefaultFormats +import eu.dnetlib.dhp.schema.common.ModelConstants import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api.Test class TestPrepare extends java.io.Serializable{ + def getString(input:HostedByItemType):String = { + + import org.json4s.jackson.Serialization.write + implicit val formats = DefaultFormats + + write(input) + } + + @Test - def testPrepareDatasource():Unit = { + def testHostedByMaptoEntityInfo() : Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -20,34 +35,125 @@ class TestPrepare extends java.io.Serializable{ .appName(getClass.getSimpleName) .config(conf) .getOrCreate() - val path = getClass.getResource("datasource.json").getPath + val hbm = getClass.getResource("hostedbymap.json").getPath - val ds :Dataset[DatasourceInfo]= SparkPrepareHostedByInfoToApply.prepareDatasourceInfo(spark, path) - val mapper :ObjectMapper = new ObjectMapper() + import spark.implicits._ - assertEquals(9, ds.count) + val mapper:ObjectMapper = new ObjectMapper() - assertEquals(8, ds.filter(hbi => !hbi.getIssn.equals("")).count) - assertEquals(5, ds.filter(hbi => !hbi.getEissn.equals("")).count) - assertEquals(0, ds.filter(hbi => !hbi.getLissn.equals("")).count) + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - assertEquals(0, ds.filter(hbi => hbi.getIssn.equals("") && hbi.getEissn.equals("") && hbi.getLissn.equals("")).count) + val ds :Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hbm)).map(toEntityInfo) - assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365")).count == 1) - assertTrue(ds.filter(hbi => hbi.getEissn.equals("2253-900X")).count == 1) - assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getEissn.equals("2253-900X")).count == 1) - assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getOfficialname.equals("Thémata")).count == 1) - assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getId.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) - ds.foreach(hbi => assertTrue(hbi.getId.startsWith("10|"))) ds.foreach(e => println(mapper.writeValueAsString(e))) + + assertEquals(20, ds.count) spark.close() } @Test - def testPrepareHostedByMap():Unit = { + def testPublicationtoEntityInfo() : Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("publication.json").getPath + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val ds :Dataset[EntityInfo] = prepareResultInfo(spark, path) + + ds.foreach(e => println(mapper.writeValueAsString(e))) + + assertEquals(2, ds.count) + + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournal_id.equals("1728-5852")).first().getId) + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournal_id.equals("0001-396X")).first().getId) + + spark.close() + } + + @Test + def testJoinResHBM (): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("iteminfofrompublication").getPath + val hbm = getClass.getResource("iteminfofromhostedbymap.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val pub_ds :Dataset[EntityInfo] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + val ds: Dataset[EntityInfo] = joinResHBM(pub_ds, hbm_ds) + + assertEquals(1, ds.count) + + val ei:EntityInfo = ds.first() + + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHb_id) + assertEquals("0001-396X", ei.getJournal_id) + assertEquals("Academic Therapy", ei.getName) + assertTrue(!ei.getOpenaccess) + + spark.close() + } + + @Test + def testJoinResHBM2 (): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("iteminfofrompublication2").getPath + val hbm = getClass.getResource("iteminfofromhostedbymap2.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val pub_ds :Dataset[EntityInfo] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + val ds: Dataset[EntityInfo] = joinResHBM(pub_ds, hbm_ds) + + assertEquals(1, ds.count) + + val ei:EntityInfo = ds.first() + + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHb_id) + assertEquals("Academic Therapy", ei.getName) + assertTrue(ei.getOpenaccess) + + ds.foreach(e => println(mapper.writeValueAsString(e))) + + spark.close() } + } From ee7ccb98dc716313b620dc47d02b1bb277632246 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:36:18 +0200 Subject: [PATCH 124/287] Hosted By Map - test class to verify the application of the hbm to results and datasource --- .../dhp/oa/graph/hostedbymap/TestApply.scala | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala new file mode 100644 index 000000000..b78eb978b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -0,0 +1,134 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.Test + +class TestApply extends java.io.Serializable{ + + @Test + def testApplyOnResult (): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("publication.json").getPath + val hbm = getClass.getResource("preparedInfo.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + implicit val mapEncoderPubInfo: Encoder[Publication] = Encoders.bean(classOf[Publication]) + + + val pub_ds :Dataset[Publication] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[Publication])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + + assertEquals(13, pub_ds.count()) + + val ds:Dataset[Publication] = SparkApplyHostedByMapToResult.applyHBtoPubs(hbm_ds, pub_ds) + + assertEquals(13, ds.count) + + val temp: Dataset[(Publication, Publication)] = pub_ds.joinWith(ds, pub_ds.col("id").equalTo(ds.col("id")), "left") + assertEquals(13, temp.count()) + temp.foreach(t2 => { + val pb : Publication = t2._1 + val pa : Publication = t2._2 + assertEquals(1, pa.getInstance().size()) + assertEquals(1, pb.getInstance().size()) + assertTrue(t2._1.getId.equals(t2._2.getId)) + if(pb.getId.equals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9")){ + assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735")) + assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy")) + assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) + assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) + assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid)) + + + assertTrue(pb.getInstance().get(0).getHostedby.getKey.equals("10|openaire____::0b74b6a356bbf23c245f9ae9a748745c")) + assertTrue(pb.getInstance().get(0).getHostedby.getValue.equals("Revistas de investigación Universidad Nacional Mayor de San Marcos")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) + assertTrue(pb.getInstance().get(0).getAccessright.getOpenAccessRoute == null) + + }else{ + assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals(pb.getInstance().get(0).getHostedby.getKey)) + assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals(pb.getInstance().get(0).getHostedby.getValue)) + assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals(pb.getInstance().get(0).getAccessright.getClassid)) + assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals(pb.getInstance().get(0).getAccessright.getClassname)) + assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute == pb.getInstance().get(0).getAccessright.getOpenAccessRoute) + + } + }) + + spark.close() + } + + + @Test + def testApplyOnDatasource():Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val dats = getClass.getResource("datasource.json").getPath + val hbm = getClass.getResource("preparedInfo2.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + implicit val mapEncoderPubInfo: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + + + val dats_ds :Dataset[Datasource] = spark.read.textFile(dats).map(p => mapper.readValue(p, classOf[Datasource])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + + assertEquals(10, dats_ds.count()) + + val ds:Dataset[Datasource] = SparkApplyHostedByMapToDatasource.applyHBtoDats(hbm_ds, dats_ds) + + assertEquals(10, ds.count) + + val temp: Dataset[(Datasource, Datasource)] = dats_ds.joinWith(ds, dats_ds.col("id").equalTo(ds.col("id")), "left") + assertEquals(10, temp.count()) + temp.foreach(t2 => { + val pb : Datasource = t2._1 + val pa : Datasource = t2._2 + assertTrue(t2._1.getId.equals(t2._2.getId)) + if(pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")){ + assertTrue(pa.getOpenairecompatibility().getClassid.equals("hostedBy")) + assertTrue(pa.getOpenairecompatibility().getClassname.equals("collected from a compatible aggregator")) + + assertTrue(pb.getOpenairecompatibility().getClassid.equals(ModelConstants.UNKNOWN)) + + + }else{ + assertTrue(pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid)) + assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassid)) + + } + }) + + spark.close() + + } + +} From 17292c6641439f175b41e665422dea25a19d44f7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:37:08 +0200 Subject: [PATCH 125/287] Hosted By Map - resources for testing purposes --- .../dhp/oa/graph/hostedbymap/datasource.json | 4 ++-- .../oa/graph/hostedbymap/datasourceinfo.json | 9 +++++++++ .../graph/hostedbymap/hostedbyitemtype.json | 17 ++++++++++++++++ .../dhp/oa/graph/hostedbymap/hostedbymap.json | 20 +++++++++++++++++++ .../hostedbymap/iteminfofromhostedbymap.json | 20 +++++++++++++++++++ .../hostedbymap/iteminfofromhostedbymap2.json | 20 +++++++++++++++++++ .../graph/hostedbymap/iteminfofrompublication | 2 ++ .../hostedbymap/iteminfofrompublication2 | 2 ++ .../oa/graph/hostedbymap/preparedInfo.json | 1 + .../oa/graph/hostedbymap/preparedInfo2.json | 2 ++ .../dhp/oa/graph/hostedbymap/publication.json | 13 ++++++++++++ 11 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json index 4467c702f..5eb41bff5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -1,5 +1,5 @@ -{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"UNKNOWN","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"native","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2077-3757","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json new file mode 100644 index 000000000..fd51cc2f2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json @@ -0,0 +1,9 @@ +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","officialname":"Известия высших учебных заведений: Проблемы энергетики","issn":"1998-9903","eissn":"","lissn":"","openAccess":false} +{"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","officialname":"Thémata","issn":"0212-8365","eissn":"2253-900X","lissn":"","openAccess":false} +{"id":"10|issn___print::051e86306840dc8255d95c5671e97928","officialname":"Science Technology & Public Policy","issn":"2077-3757","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Cahiers d’études germaniques","issn":"0751-4239","eissn":"2605-8359","lissn":"","openAccess":false} +{"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","officialname":"Regional Economics Theory and Practice","issn":"2073-1477","eissn":"2311-8733","lissn":"","openAccess":false} +{"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","officialname":"Transplantation","issn":"0041-1337","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","officialname":"International Journal of Operations Research and Information Systems","issn":"1947-9328","eissn":"1947-9336","lissn":"","openAccess":false} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Bulletin of the British Mycological Society","issn":"0007-1528","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","officialname":"Journal of Technology and Innovation","issn":"","eissn":"2410-3993","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json new file mode 100644 index 000000000..6e4c11f49 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json @@ -0,0 +1,17 @@ +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","officialname":"Academic Therapy","issn":"0001-396X","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Forschung im Ingenieurwesen","issn":"0015-7899","eissn":"1434-0860","lissn":"","openAccess":true} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","officialname":"Review of Polarography","issn":"0034-6691","eissn":"1884-7692","lissn":"","openAccess":false} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Land Economics","issn":"0023-7639","eissn":"1543-8325","lissn":"","openAccess":false} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","officialname":"Journal of Economic Entomology","issn":"0022-0493","eissn":"0022-0493","lissn":"","openAccess":false} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","officialname":"Brigham Young University science bulletin","issn":"0068-1024","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","officialname":"Journal of Contemporary Psychotherapy","issn":"0022-0116","eissn":"1573-3564","lissn":"","openAccess":false} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","officialname":"Quarterly of Applied Mathematics","issn":"0033-569X","eissn":"1552-4485","lissn":"","openAccess":false} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","officialname":"Revue de Synthèse","issn":"0035-1776","eissn":"1955-2343","lissn":"","openAccess":false} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","officialname":"Journal of Statistical Physics","issn":"0022-4715","eissn":"1572-9613","lissn":"","openAccess":false} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","officialname":"Children s Literature in Education","issn":"0045-6713","eissn":"1573-1693","lissn":"","openAccess":false} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","officialname":"Slovenské divadlo","issn":"0037-699X","eissn":"1336-8605","lissn":"","openAccess":true} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","officialname":"Vistas in Astronomy","issn":"0083-6656","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","officialname":"Public Administration","issn":"0033-3298","eissn":"1467-9299","lissn":"","openAccess":false} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","officialname":"Memory & Cognition","issn":"0090-502X","eissn":"1532-5946","lissn":"","openAccess":false} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","officialname":"Littérature","issn":"0047-4800","eissn":"1958-5926","lissn":"","openAccess":false} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","officialname":"Proceedings of the Society for Analytical Chemistry","issn":"0037-9697","eissn":"","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json new file mode 100644 index 000000000..188380bc6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json @@ -0,0 +1,20 @@ +{"0001-396X":{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","officialname":"Academic Therapy","issn":"0001-396X","eissn":"","lissn":"","openAccess":false}} +{"0015-7899":{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Forschung im Ingenieurwesen","issn":"0015-7899","eissn":"1434-0860","lissn":"","openAccess":false}} +{"1434-0860":{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Forschung im Ingenieurwesen","issn":"0015-7899","eissn":"1434-0860","lissn":"","openAccess":true}} +{"0022-0116":{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","officialname":"Journal of Contemporary Psychotherapy","issn":"0022-0116","eissn":"1573-3564","lissn":"","openAccess":false}} +{"1573-3564":{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","officialname":"Journal of Contemporary Psychotherapy","issn":"0022-0116","eissn":"1573-3564","lissn":"","openAccess":false}} +{"0022-0493":{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","officialname":"Journal of Economic Entomology","issn":"0022-0493","eissn":"0022-0493","lissn":"","openAccess":false}} +{"0022-4715":{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","officialname":"Journal of Statistical Physics","issn":"0022-4715","eissn":"1572-9613","lissn":"","openAccess":false}} +{"1543-8325":{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Land Economics","issn":"0023-7639","eissn":"1543-8325","lissn":"","openAccess":false}} +{"0023-7639":{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Land Economics","issn":"0023-7639","eissn":"1543-8325","lissn":"","openAccess":false}} +{"0033-3298":{"id":"10|issn___print::91899e3872351895467856daeb798f63","officialname":"Public Administration","issn":"0033-3298","eissn":"1467-9299","lissn":"","openAccess":false}} +{"0033-569X":{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","officialname":"Quarterly of Applied Mathematics","issn":"0033-569X","eissn":"1552-4485","lissn":"","openAccess":false}} +{"0034-6691":{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","officialname":"Review of Polarography","issn":"0034-6691","eissn":"1884-7692","lissn":"","openAccess":false}} +{"0035-1776":{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","officialname":"Revue de Synthèse","issn":"0035-1776","eissn":"1955-2343","lissn":"","openAccess":false}} +{"0037-699X":{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","officialname":"Slovenské divadlo","issn":"0037-699X","eissn":"1336-8605","lissn":"","openAccess":true}} +{"0037-9697":{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","officialname":"Proceedings of the Society for Analytical Chemistry","issn":"0037-9697","eissn":"","lissn":"","openAccess":false}} +{"0045-6713":{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","officialname":"Children s Literature in Education","issn":"0045-6713","eissn":"1573-1693","lissn":"","openAccess":false}} +{"0047-4800":{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","officialname":"Littérature","issn":"0047-4800","eissn":"1958-5926","lissn":"","openAccess":false}} +{"0068-1024":{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","officialname":"Brigham Young University science bulletin","issn":"0068-1024","eissn":"","lissn":"","openAccess":false}} +{"0083-6656":{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","officialname":"Vistas in Astronomy","issn":"0083-6656","eissn":"","lissn":"","openAccess":false}} +{"0090-502X":{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","officialname":"Memory & Cognition","issn":"0090-502X","eissn":"1532-5946","lissn":"","openAccess":false}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json new file mode 100644 index 000000000..dfb95816e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json @@ -0,0 +1,20 @@ +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"0001-396X","name":"Academic Therapy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journal_id":"0033-569X","name":"Quarterly of Applied Mathematics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"0015-7899","name":"Forschung im Ingenieurwesen","openaccess":false,"hb_id":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journal_id":"0034-6691","name":"Review of Polarography","openaccess":false,"hb_id":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"1434-0860","name":"Forschung im Ingenieurwesen","openaccess":true,"hb_id":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journal_id":"0035-1776","name":"Revue de Synthèse","openaccess":false,"hb_id":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"0022-0116","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journal_id":"0037-699X","name":"Slovenské divadlo","openaccess":true,"hb_id":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"1573-3564","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journal_id":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openaccess":false,"hb_id":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journal_id":"0022-0493","name":"Journal of Economic Entomology","openaccess":false,"hb_id":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journal_id":"0045-6713","name":"Children s Literature in Education","openaccess":false,"hb_id":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journal_id":"0022-4715","name":"Journal of Statistical Physics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journal_id":"0047-4800","name":"Littérature","openaccess":false,"hb_id":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"1543-8325","name":"Land Economics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journal_id":"0068-1024","name":"Brigham Young University science bulletin","openaccess":false,"hb_id":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"0023-7639","name":"Land Economics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journal_id":"0083-6656","name":"Vistas in Astronomy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journal_id":"0033-3298","name":"Public Administration","openaccess":false,"hb_id":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journal_id":"0090-502X","name":"Memory & Cognition","openaccess":false,"hb_id":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json new file mode 100644 index 000000000..3d2f2e005 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json @@ -0,0 +1,20 @@ +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"0001-396X","name":"Academic Therapy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journal_id":"0033-569X","name":"Quarterly of Applied Mathematics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"0015-7899","name":"Forschung im Ingenieurwesen","openaccess":false,"hb_id":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journal_id":"0034-6691","name":"Review of Polarography","openaccess":false,"hb_id":""} +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"1434-0860","name":"Academic Therapy","openaccess":true,"hb_id":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journal_id":"0035-1776","name":"Revue de Synthèse","openaccess":false,"hb_id":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"0022-0116","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journal_id":"0037-699X","name":"Slovenské divadlo","openaccess":true,"hb_id":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"1573-3564","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journal_id":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openaccess":false,"hb_id":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journal_id":"0022-0493","name":"Journal of Economic Entomology","openaccess":false,"hb_id":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journal_id":"0045-6713","name":"Children s Literature in Education","openaccess":false,"hb_id":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journal_id":"0022-4715","name":"Journal of Statistical Physics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journal_id":"0047-4800","name":"Littérature","openaccess":false,"hb_id":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"1543-8325","name":"Land Economics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journal_id":"0068-1024","name":"Brigham Young University science bulletin","openaccess":false,"hb_id":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"0023-7639","name":"Land Economics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journal_id":"0083-6656","name":"Vistas in Astronomy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journal_id":"0033-3298","name":"Public Administration","openaccess":false,"hb_id":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journal_id":"0090-502X","name":"Memory & Cognition","openaccess":false,"hb_id":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication new file mode 100644 index 000000000..6ee4f94be --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication @@ -0,0 +1,2 @@ +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1728-5852","name":"","openaccess":false,"hb_id":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"0001-396X","name":"","openaccess":false,"hb_id":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 new file mode 100644 index 000000000..59bd32d4b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 @@ -0,0 +1,2 @@ +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1434-0860","name":"","openaccess":false,"hb_id":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"0001-396X","name":"","openaccess":false,"hb_id":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json new file mode 100644 index 000000000..d9c201082 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json @@ -0,0 +1 @@ +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1434-0860","name":"Academic Therapy","openaccess":true,"hb_id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json new file mode 100644 index 000000000..6e1cce3b6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json @@ -0,0 +1,2 @@ +{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json new file mode 100644 index 000000000..602bedbda --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json @@ -0,0 +1,13 @@ +{"author":[{"fullname":"Shu, L.","name":"L.","pid":[],"rank":1,"surname":"Shu"},{"fullname":"Zhang, Y.","name":"Y.","pid":[],"rank":2,"surname":"Zhang"},{"fullname":"Chen, X.","name":"X.","pid":[],"rank":3,"surname":"Chen"},{"fullname":"Wang, S.","name":"S.","pid":[],"rank":4,"surname":"Wang"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2015-01-01"},"dateofcollection":"2021-07-10T12:27:03.175Z","dateoftransformation":"2021-07-10T12:38:22.255Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::022a6f1cbe150ebbfe79f31fe220d2e5","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s11036-015-0594-3"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2015-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/f9b55b76-a06d-4b4e-90d5-ff1dee0f53c4"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813784431,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-02-26T11:19:47Z","harvestDate":"2021-07-10T12:27:03.175Z","identifier":"oai:cris.vtt.fi:publications/f9b55b76-a06d-4b4e-90d5-ff1dee0f53c4","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/f9b55b76-a06d-4b4e-90d5-ff1dee0f53c4","50|355e65625b88::022a6f1cbe150ebbfe79f31fe220d2e5"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Shu , L , Zhang , Y , Chen , X & Wang , S 2015 , ' Editorial for Special Issue on Industrial Networks and Intelligent Systems ' , Mobile Networks and Applications , vol. 20 , no. 2 , pp. 121-123 . https://doi.org/10.1007/s11036-015-0594-3"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial for Special Issue on Industrial Networks and Intelligent Systems"}]} +{"author":[{"fullname":"Hemmilä, Kari","name":"Kari","pid":[],"rank":1,"surname":"Hemmilä"},{"fullname":"Saarni, Risto","name":"Risto","pid":[],"rank":2,"surname":"Saarni"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2002-01-01"},"dateofcollection":"2021-07-10T12:31:21.927Z","dateoftransformation":"2021-07-10T13:17:02.345Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Millainen on onnistunut ikkunaremontti? Mikä on asukkaan kannalta paras vaihtoehto? Mitä seikkoja ikkunaremontissa on erityisesti otettava huomioon? Kirjan tekijät diplomi-insinööri Kari Hemmilä ja tekniikan lisensiaatti Risto Saarni ovat olleet mukana monissa ikkunoita ja niiden remontointia koskevissa tutkimusprojekteissa. Näiden lisäksi kirja perustuu lukuisista ikkunaremonteista saatuihin kokemuksiin, remonttien lähtökohtiin, toteutukseen sekä toimivuuden ja käyttökokemusten seurantaan. Onnistunut ikkunaremontti on sellainen, johon ollaan tyytyväisiä vielä vuosien päästä. Sen perustana on perehtyminen nykytilaan, huolellinen toteutus ja kunnossapito, joita tässä kirjassa tuodaan esille monesta näkökulmasta."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::4349eedb466e22fa44b3fe1e0380e0a7","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2002-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/72bdf3be-eec3-4fbf-89bd-b0d9bbf5b523"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813851542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-08-11T09:10:31Z","harvestDate":"2021-07-10T12:31:21.927Z","identifier":"oai:cris.vtt.fi:publications/72bdf3be-eec3-4fbf-89bd-b0d9bbf5b523","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/72bdf3be-eec3-4fbf-89bd-b0d9bbf5b523","50|355e65625b88::4349eedb466e22fa44b3fe1e0380e0a7"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Rakennustieto oy"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Hemmilä , K & Saarni , R 2002 , Ikkunaremontti . Rakennustieto oy , Helsinki ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ikkunaremontti"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Window renovation"}]} +{"author":[{"fullname":"Pavlic, Davor","name":"Davor","pid":[],"rank":1,"surname":"Pavlic"},{"fullname":"Pulkkinen, Antti","name":"Antti","pid":[],"rank":2,"surname":"Pulkkinen"},{"fullname":"Riitahuhta, Asko","name":"Asko","pid":[],"rank":3,"surname":"Riitahuhta"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"dateofcollection":"2021-07-10T12:34:48.501Z","dateoftransformation":"2021-07-10T13:26:46.605Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::53dda1c97d9a6da19a97b3c3aadaa3a0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/22f1944a-0af1-4774-86fe-27cd45ddc6ea"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813869068,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-26T12:03:33Z","harvestDate":"2021-07-10T12:34:48.501Z","identifier":"oai:cris.vtt.fi:publications/22f1944a-0af1-4774-86fe-27cd45ddc6ea","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/22f1944a-0af1-4774-86fe-27cd45ddc6ea","50|355e65625b88::53dda1c97d9a6da19a97b3c3aadaa3a0"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Design Society"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Pavlic , D , Pulkkinen , A & Riitahuhta , A 2006 , A conceptual framework of product family architecture . in DS 47: Proceedings of NordDesign 2006 Conference, Rejkjavik, Iceland, 16.-18.08.2006 . Design Society , pp. 212-222 . < https://www.designsociety.org/publication/24280/A+Conceptual+Framework+of+Product+Family+Architecture >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A conceptual framework of product family architecture"}]} +{"author":[{"fullname":"Carpen, Leena","name":"Leena","pid":[],"rank":1,"surname":"Carpen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"dateofcollection":"2021-07-10T12:27:29.079Z","dateoftransformation":"2021-07-10T13:36:20.24Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::5ef53b02a53fa87a5eb236484d9a011d","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/1f27e75d-365e-4810-be48-ef60378d8279"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813880572,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-05-17T10:37:19Z","harvestDate":"2021-07-10T12:27:29.079Z","identifier":"oai:cris.vtt.fi:publications/1f27e75d-365e-4810-be48-ef60378d8279","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/1f27e75d-365e-4810-be48-ef60378d8279","50|355e65625b88::5ef53b02a53fa87a5eb236484d9a011d"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Carpen , L 1999 , ' Case: microbial induced corrosion in paper machines ' , RenhetsTeknik , no. 2 , 16 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Case: microbial induced corrosion in paper machines"}]} +{"author":[{"fullname":"Tuomola, Tuomas","name":"Tuomas","pid":[],"rank":1,"surname":"Tuomola"},{"fullname":"Siitonen, Veijo","name":"Veijo","pid":[],"rank":2,"surname":"Siitonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1970-01-01"},"dateofcollection":"2021-07-10T12:30:02.598Z","dateoftransformation":"2021-07-10T13:50:33.549Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::73a86fe5bef6d4fae3caf02f13840439","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1970-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/56ed9ab2-3b3c-4829-a63b-52311969dd30"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813901871,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-09-27T09:19:12Z","harvestDate":"2021-07-10T12:30:02.598Z","identifier":"oai:cris.vtt.fi:publications/56ed9ab2-3b3c-4829-a63b-52311969dd30","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/56ed9ab2-3b3c-4829-a63b-52311969dd30","50|355e65625b88::73a86fe5bef6d4fae3caf02f13840439"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"VTT Technical Research Centre of Finland"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tuomola , T & Siitonen , V 1970 , Koulujen lämmitys ja ilmanvaihto. Osa 2. Eräitä LI-järjestelmien käyttökokeita vuosina 1965-1968 . Valtion teknillinen tutkimuslaitos: Lämpöteknillinen laboratorio. Tiedonanto , no. 4 , VTT Technical Research Centre of Finland , Helsinki ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Koulujen lämmitys ja ilmanvaihto. Osa 2. Eräitä LI-järjestelmien käyttökokeita vuosina 1965-1968"}]} +{"author":[{"fullname":"Paakkari, Jussi","name":"Jussi","pid":[],"rank":1,"surname":"Paakkari"},{"fullname":"Ailisto, Heikki","name":"Heikki","pid":[],"rank":2,"surname":"Ailisto"},{"fullname":"Niskala, Matti","name":"Matti","pid":[],"rank":3,"surname":"Niskala"},{"fullname":"Mäkäräinen, Masa","name":"Masa","pid":[],"rank":4,"surname":"Mäkäräinen"},{"fullname":"Väinämö, Kauko","name":"Kauko","pid":[],"rank":5,"surname":"Väinämö"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"dateofcollection":"2021-07-10T12:27:29.082Z","dateoftransformation":"2021-07-10T13:56:12.148Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::7cfd7ecc53246b72e306a5057e4487b3","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/33cedb13-48f3-4326-b5c3-18d248374378"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813911302,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-29T02:07:50Z","harvestDate":"2021-07-10T12:27:29.082Z","identifier":"oai:cris.vtt.fi:publications/33cedb13-48f3-4326-b5c3-18d248374378","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::7cfd7ecc53246b72e306a5057e4487b3","oai:cris.vtt.fi:publications/33cedb13-48f3-4326-b5c3-18d248374378"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Research and Development Centre of Kajaani"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Paakkari , J , Ailisto , H , Niskala , M , Mäkäräinen , M & Väinämö , K 1999 , Machine vision guided waterjet cutting . in 3rd International Symposium on Optics in Engineering. Kajaani, FI, 19 - 21 Jan. 1999 . Research and Development Centre of Kajaani , Kajaani ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Machine vision guided waterjet cutting"}]} +{"author":[{"fullname":"López-Lambas, M.","name":"M.","pid":[],"rank":1,"surname":"López-Lambas"},{"fullname":"López-Suárez, E.","name":"E.","pid":[],"rank":2,"surname":"López-Suárez"},{"fullname":"La Paix-Puello, L.","name":"L.","pid":[],"rank":3,"surname":"La Paix-Puello"},{"fullname":"Binsted, A.","name":"A.","pid":[],"rank":4,"surname":"Binsted"},{"fullname":"Tuominen, Anu","name":"Anu","pid":[],"rank":5,"surname":"Tuominen"},{"fullname":"Järvi, Tuuli","name":"Tuuli","pid":[],"rank":6,"surname":"Järvi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"dateofcollection":"2021-07-10T12:26:56.401Z","dateoftransformation":"2021-07-10T14:03:19.178Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813924212,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-31T04:35:29Z","harvestDate":"2021-07-10T12:26:56.401Z","identifier":"oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"European Commission EC"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"López-Lambas , M , López-Suárez , E , La Paix-Puello , L , Binsted , A , Tuominen , A & Järvi , T 2009 , Sustainable Development methodology development and application results : Deliverable 4.1 . European Commission EC ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Sustainable Development methodology development and application results:Deliverable 4.1"}]} +{"author":[{"fullname":"Vänskä, L.","name":"L.","pid":[],"rank":1,"surname":"Vänskä"},{"fullname":"Rosenberg, Rolf","name":"Rolf","pid":[],"rank":2,"surname":"Rosenberg"},{"fullname":"Pitkänen, V.","name":"V.","pid":[],"rank":3,"surname":"Pitkänen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"dateofcollection":"2021-07-10T12:29:21.556Z","dateoftransformation":"2021-07-10T15:08:41.325Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

An automatic gamma spectrometer for activation analysis has been developed at the Technical Research Centre of Finland. The on-line system comprises a sample changer for up to 120 samples, detector, multichannel analyzer, microcomputer programmed with Basic language and input/output devices.

"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/0167-5087(83)90428-3"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813653066,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-01T03:06:44Z","harvestDate":"2021-07-10T12:29:21.556Z","identifier":"oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vänskä , L , Rosenberg , R & Pitkänen , V 1983 , ' An automatic gamma spectrometer for activation analysis ' , Nuclear Instruments and Methods In Physics Research , vol. 213 , no. 2-3 , pp. 343 - 347 . https://doi.org/10.1016/0167-5087(83)90428-3"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"An automatic gamma spectrometer for activation analysis"}]} +{"author":[{"fullname":"Silla, Anne","name":"Anne","pid":[],"rank":1,"surname":"Silla"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"dateofcollection":"2021-07-10T12:36:22.169Z","dateoftransformation":"2021-07-10T15:19:31.29Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This paper presents an assessment framework with 13 criteria to systematically evaluate measures for improving the safety of level crossings (LCs). The criteria were first applied in the Finnish context, where eight safety measures were estimated to reduce LC accidents by more than 20%. Next, the estimates from the Finnish study were used as a starting point for evaluating innovative and cost-effective LC safety measures piloted during an EU project, SAFER-LC. One such measure was estimated to potentially reduce LC accidents by more than 20%. The proposed assessment framework is a good way to assess and categorise LC safety measures. The summary of the assessment criteria is further intended for decision-makers looking to implement effective measures in a specific situation or at a particular LC."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f7d973bc9fc15080fa9491d997568847","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/19966012-bcd9-4427-8136-a60e91b152f9"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813676961,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-07-02T11:56:26Z","harvestDate":"2021-07-10T12:36:22.169Z","identifier":"oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","50|355e65625b88::f7d973bc9fc15080fa9491d997568847"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Silla , A 2021 , Evaluation of Level crossing Safety Measures : Applicability of the Framework to Innovative and Low-cost Measures . in Transportation Research Board : The TRIS and ITRD database . 100th Annual Meeting of the Transportation Research Board (TRB) , Washington DC , United States , 5/01/21 . < https://trid.trb.org/view/1759287 >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Evaluation of Level crossing Safety Measures:Applicability of the Framework to Innovative and Low-cost Measures"}]} +{"author":[{"fullname":"Barrientos Jiménez, Elsa Julia","name":"Elsa Julia","pid":[],"rank":1,"surname":"Barrientos Jiménez"},{"fullname":"Vildoso Villegas, Jesahel","name":"Jesahel","pid":[],"rank":2,"surname":"Vildoso Villegas"},{"fullname":"Sánchez García, Tula Carola","name":"Tula Carola","pid":[],"rank":3,"surname":"Sánchez García"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"dateofcollection":"2021-03-12T07:05:02.842Z","dateoftransformation":"2021-03-12T07:11:33.642Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This research consists of a diagnosis of the post graduate unit of UNMSM Faculty Education; it counts with the participation of 358 students of Second Specialty, Master and Doctorate programs. To carry out this diagnosis the instrument of the Iberoamerican University Association was taken into account. The following variables were used: students, teachers, study plans, research, management, surroundings, graduates and impact and evaluation. According to the established measurement the post graduate unit has obtained 69,27 points, which places it on a good level. This level considers a range point from 60 through 74."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"La investigación realiza un diagnóstico de la Unidad de Post Grado de la Facultad de Educación de la UNMSM, con la participación de 358 estudiantes de Segunda Especialidad, Maestrías y Doctorado. Para la realización del diagnóstico se consideró el instrumento de la Asociación Universitaria Iberoamericana de Post Grado, que toma en cuenta las siguientes variables: estudiantes, profesores, plan de estudios, investigación, gestión, entorno, egresados e impacto y evaluación. Realizado el diagnóstico de acuerdo a la medición establecida la UPG de Educación de acuerdo al puntaje obtenido de 69, 27 se ubica en el nivel bueno, ya que dicho nivel considera las puntuaciones comprendidas entre 60 y 74."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"distributionlocation":"","hostedby":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by-nc-sa/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://revistasinvestigacion.unmsm.edu.pe/index.php/educa/article/view/4754"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"0001-396X","issnPrinted":"1728-5852","name":"Investigación Educativa","sp":"","vol":""},"language":{"classid":"spa","classname":"Spanish; Castilian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627812364983,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frevistasinvestigacion.unmsm.edu.pe%2Findex.php%2Findex%2Foai","datestamp":"2021-03-06T03:56:00Z","harvestDate":"2021-03-12T07:05:02.842Z","identifier":"oai:ojs.csi.unmsm:article/4754","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","oai:ojs.csi.unmsm:article/4754"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Facultad de Educación, Universidad Nacional Mayor de San Marcos"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol 14 No 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol. 14 Núm. 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1728-5852"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnostic"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluation."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnóstico"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluación."}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNOSTIC OF THE POST GRADUATE UNIT OF UNMSM EDUCATION FACULTY"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNÓSTICO DE LA UNIDAD DE POST GRADO DE LA FACULTAD DE EDUCACIÓN DE LA UNMSM"}]} +{"author":[{"affiliation":[],"fullname":"Jež Rogelj, Mateja","name":"Mateja","pid":[],"rank":1,"surname":"Jež Rogelj"},{"affiliation":[],"fullname":"Mikuš, Ornella","name":"Ornella","pid":[],"rank":2,"surname":"Mikuš"},{"affiliation":[],"fullname":"Grgić, Ivo","name":"Ivo","pid":[],"rank":3,"surname":"Grgić"},{"affiliation":[],"fullname":"Zrakić, Magdalena","name":"Magdalena","pid":[],"rank":4,"surname":"Zrakić"},{"affiliation":[],"fullname":"Hadelan, Lari","name":"Lari","pid":[],"rank":5,"surname":"Hadelan"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"dateofcollection":"2021-07-11T01:25:30.597Z","dateoftransformation":"2021-07-11T02:00:20.813Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Koncept održivog razvoja ima tri komponente: ekološku, ekonomsku i društvenu. U ovom je radu odabirom pet ekoloških indikatora obuhvaćena ekološka komponenta. Indikatori su birani s obzirom na učestalost njihova predlaganja i korištenja u dokumentima Europske unije (EU) i znanstvenim radovima. Cilj rada je vrednovati ekološke indikatore uz argumentiranje njihove važnosti za postizanje održivog ruralnog razvoja provođenjem ankete među ekspertima i različitim dionicima ruralnog razvoja. U anketi je sudjelovalo 47 ispitanika. Od predloženih je indikatora najvišu prosječnu ocjenu dobio indikator zastupljenost ekološke poljoprivrede u ukupnoj poljoprivredi (4, 15), a slijede ga biološka raznolikost biljnih i životinjskih vrsta (4, 09), upotreba pesticida/ha (4, 06), broj uvjetnih grla/ha korištenog zemljišta (4, 00) i upotreba mineralnih gnojiva/ha (3, 91)."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/877152"]}],"language":{"classid":"hrv","classname":"Croatian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813176553,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2017-05-25T04:42:10Z","harvestDate":"2021-07-11T01:25:30.597Z","identifier":"877152","metadataNamespace":""}},"originalId":["877152","50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Prijedlog ekoloških indikatora za mjerenje održivog ruralnog razvoja"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proposal of the environmental indicators for measuring sustainable rural development"}]} +{"author":[{"affiliation":[],"fullname":"Petric, Bartul","name":"Bartul","pid":[],"rank":1,"surname":"Petric"},{"affiliation":[],"fullname":"Petric, Nedjeljka","name":"Nedjeljka","pid":[],"rank":2,"surname":"Petric"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Oliver Le Faucheux"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"dateofcollection":"2021-07-11T00:42:48.748Z","dateoftransformation":"2021-07-11T02:01:00.178Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"In the present paper it was studied the waste sludge separation and its use, with the purpose to prevent the sea water pollution."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/314877"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813187318,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2007-12-18T09:21:18Z","harvestDate":"2021-07-11T00:42:48.748Z","identifier":"314877","metadataNamespace":""}},"originalId":["314877","50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1977-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Treatment of Waste Sea Water in Calcium Carbide Industry"}]} +{"author":[{"affiliation":[],"fullname":"Erstić, Marijana","name":"Marijana","pid":[],"rank":1,"surname":"Erstić"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"dateofcollection":"2021-07-11T01:23:38.842Z","dateoftransformation":"2021-07-11T02:01:53.063Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Die Filme Michael Hanekes scheinen vor allem mit jenen des späten Pasolini zu korrespondieren, allen voran mit SALÒ aber auch mit TEOREMA, jenem Film, in dem sich Pasolini dem italienischen Großbürgertum der ausgehenden 1960er Jahre widmet. Erzählt wird in TEOREMA die Geschichte einer zu Beginn des Films anscheinend intakten Familie, die sich durch die Begegnung mit einem Unbekannten der eigenen Leere bewusst wird und auseinanderfällt. Der Film endet mit dem Schrei eines der Protagonisten, der hier jedoch weniger ein neues Leben bedeutet, als vielmehr eine Reaktion auf die repressiven und normativen Mechanismen der Gesellschaft. Hanekes Filme LEMMINGE, TEIL II und DER SIEBENTE KONTINENT gehen jeweils unter-schiedlich von einer vergleichbaren Prämisse einer leeren Familie aus. Doch während die Schreie in den LEMMINGEN immer lauter werden, verstummen sie im SIEBENTEN KONTINENT schließlich. In allen benannten Filmen hat das Werk Francis Bacons eine besondere Rolle gespielt und wurde entweder explizit (TEOREMA, LEMMINGE II) oder implizit (DER SIEBENTE KONTINENT) thematisiert. Der Vortrag geht den Bildern des (stummen) Schreis in den genannten Filmen nach."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/848053"]}],"language":{"classid":"deu/ger","classname":"German","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813203778,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2016-12-06T18:47:39Z","harvestDate":"2021-07-11T01:23:38.842Z","identifier":"848053","metadataNamespace":""}},"originalId":["848053","50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2014-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" von Pier Paolo Pasolini oder: Ein Schrei auf Francis Bacons Spuren"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" by Pier Paolo Pasolini"}]} \ No newline at end of file From 327cddde336a2a590efeba3a76febb1cec36711b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 3 Aug 2021 10:44:13 +0200 Subject: [PATCH 126/287] Hosted By Map - refactoring --- .../graph/hostedbymap/model/EntityInfo.java | 97 +++++++++---------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java index 7bf9d0598..1facaa792 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java @@ -1,69 +1,68 @@ + package eu.dnetlib.dhp.oa.graph.hostedbymap.model; import java.io.Serializable; public class EntityInfo implements Serializable { - private String id; - private String journal_id; - private String name; - private Boolean openaccess; - private String hb_id; + private String id; + private String journal_id; + private String name; + private Boolean openaccess; + private String hb_id; + public String getId() { + return id; + } + public void setId(String id) { + this.id = id; + } - public String getId() { - return id; - } + public String getJournal_id() { + return journal_id; + } - public void setId(String id) { - this.id = id; - } + public void setJournal_id(String journal_id) { + this.journal_id = journal_id; + } - public String getJournal_id() { - return journal_id; - } + public String getName() { + return name; + } - public void setJournal_id(String journal_id) { - this.journal_id = journal_id; - } + public void setName(String name) { + this.name = name; + } - public String getName() { - return name; - } + public Boolean getOpenaccess() { + return openaccess; + } - public void setName(String name) { - this.name = name; - } + public void setOpenaccess(Boolean openaccess) { + this.openaccess = openaccess; + } - public Boolean getOpenaccess() { - return openaccess; - } + public String getHb_id() { + return hb_id; + } - public void setOpenaccess(Boolean openaccess) { - this.openaccess = openaccess; - } + public void setHb_id(String hb_id) { + this.hb_id = hb_id; + } - public String getHb_id() { - return hb_id; - } + public static EntityInfo newInstance(String id, String j_id, String name) { + return newInstance(id, j_id, name, false); - public void setHb_id(String hb_id) { - this.hb_id = hb_id; - } + } - public static EntityInfo newInstance(String id, String j_id, String name){ - return newInstance(id, j_id, name, false); + public static EntityInfo newInstance(String id, String j_id, String name, Boolean openaccess) { + EntityInfo pi = new EntityInfo(); + pi.id = id; + pi.journal_id = j_id; + pi.name = name; + pi.openaccess = openaccess; + pi.hb_id = ""; + return pi; - } - - public static EntityInfo newInstance(String id, String j_id, String name, Boolean openaccess){ - EntityInfo pi = new EntityInfo(); - pi.id = id; - pi.journal_id = j_id; - pi.name = name; - pi.openaccess = openaccess; - pi.hb_id = ""; - return pi; - - } + } } From 461b8a29a0e52d92cb12aaec7e481844f9124c51 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 3 Aug 2021 10:46:51 +0200 Subject: [PATCH 127/287] removed not needed class --- .../hostedbymap/model/DatasourceInfo.java | 72 ------------------- 1 file changed, 72 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java deleted file mode 100644 index f5ac8f70c..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java +++ /dev/null @@ -1,72 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostedbymap.model; - -import java.io.Serializable; - -public class DatasourceInfo implements Serializable { - private String id; - private String officialname; - private String issn; - private String eissn; - private String lissn; - private Boolean openAccess; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getOfficialname() { - return officialname; - } - - public void setOfficialname(String officialname) { - this.officialname = officialname; - } - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getEissn() { - return eissn; - } - - public void setEissn(String eissn) { - this.eissn = eissn; - } - - public String getLissn() { - return lissn; - } - - public void setLissn(String lissn) { - this.lissn = lissn; - } - - public Boolean getOpenAccess() { - return openAccess; - } - - public void setOpenAccess(Boolean openAccess) { - this.openAccess = openAccess; - } - - public static DatasourceInfo newInstance(String id, String officialname, String issn, String eissn, String lissn){ - DatasourceInfo di = new DatasourceInfo(); - di.id = id; - di.officialname = officialname; - di.issn = (issn != null) ? issn : "" ; - di.eissn = (eissn != null) ? eissn:""; - di.lissn = (lissn != null) ? lissn : ""; - di.openAccess = false; - - return di; - } -} From 9831725073e21ed89e014072c92cc0fc57642cee Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 3 Aug 2021 11:02:17 +0200 Subject: [PATCH 128/287] Hosted By Map - remove from workflow a step not needed. The hbm will be take care also of the integration of the unibi list of gold openaccess journals --- .../dhp/doiboost/preprocess/oozie_app/workflow.xml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 4de1a2185..de433b038 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,7 +63,6 @@ - ${wf:conf('resumeFrom') eq 'DownloadGoldIssn'} ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} @@ -77,18 +76,6 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
- - - eu.dnetlib.doiboost.GetCSV - --hdfsNameNode${nameNode} - --fileURL${unibiGoldIssnFileURL} - --hdfsPath${hdfsPath} - --classForNameeu.dnetlib.doiboost.UnibiGoldModel - - - - - From a7bf314fd2a8c0fb1075eb2738023e7d802d0364 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:13:30 +0200 Subject: [PATCH 129/287] Hosted By Map - added new aggregator to get just one result per datasource id --- .../oa/graph/hostedbymap/Aggregators.scala | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index af4ac4507..8198b197a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -103,4 +103,40 @@ object Aggregators { transformedData } + def datasourceToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ + override def zero: EntityInfo = EntityInfo.newInstance("","","") + + override def reduce(b: EntityInfo, a:EntityInfo): EntityInfo = { + return merge(b, a) + } + override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(!b1.getHb_id.equals("")){ + return b1 + } + b2 + + } + override def finish(reduction: EntityInfo): EntityInfo = reduction + override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + }.toColumn + + + def datasourceToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { + val transformedData : Dataset[EntityInfo] = df + .groupByKey(_.getHb_id)(Encoders.STRING) + .agg(Aggregators.datasourceToSingleIdAggregator) + .map{ + case (id:String , res: EntityInfo) => res + }(Encoders.bean(classOf[EntityInfo])) + + transformedData + } } From 3fc820203bcd76bf4aa81baa027cd76bee7c0460 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 4 Aug 2021 10:13:48 +0200 Subject: [PATCH 130/287] fixed wrong test file --- .../eu/dnetlib/dhp/sx/graph/oaf_to_summary | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary index 4d581044a..e1fd758b4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary @@ -1,20 +1,10 @@ -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::0f2129f0a8ddfb099b9fabba3105245f","target":"50|doi_________::4af011e641e0ba286660fd24a3f603b7","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2018-01-01","dataInfo":null}]} -{"id":"50|doi_________::0f2129f0a8ddfb099b9fabba3105245f","localIdentifier":[{"identifier":"10.1111/1346-8138.14162","schema":"doi","url":"http://onlinelibrary.wiley.com/wol1/doi/10.1111/1346-8138.14162/fullpdf"}],"typology":"publication","subType":"Article","title":["Guideline of SSc","Diagnostic criteria, severity classification and guidelines of systemic sclerosis"],"author":["Yoshihide Asano","Masatoshi Jinnin","Yasushi Kawaguchi","Masataka Kuwana","Daisuke Goto","Shinichi Sato","Kazuhiko Takehara","Masaru Hatano","Manabu Fujimoto","Naoki Mugii","Hironobu Ihn"],"date":["2018-04-23","2018-01-01"],"subject":null,"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::1b57d5ebe71734c1fa98624d9609971e","target":"50|doi_________::7e79063f205480e61ee7fdcf7ab03bad","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2004-11-01","dataInfo":null}]} -{"id":"50|doi_________::1b57d5ebe71734c1fa98624d9609971e","localIdentifier":[{"identifier":"10.1002/ajmg.a.30270","schema":"doi","url":"https://onlinelibrary.wiley.com/doi/full/10.1002/ajmg.a.30270"}],"typology":"publication","subType":"Article","title":["Clinical variability in a Noonan syndrome family with a newPTPN11 gene mutation"],"author":["D�bora Romeo Bertola","Alexandre C. Pereira","Paulo S.L. de Oliveira","Chong A. Kim","Jos� Eduardo Krieger"],"date":["2004-09-21T23:19:41Z","2004-11-01"],"subject":[{"scheme":"keywords","value":"Genetics(clinical)"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3d6b9e4f51325f7f17b6809513812a43","target":"50|doi_________::edb21431e0271061e0dddc248300708a","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2010-08-16","dataInfo":null}]} -{"id":"50|doi_________::3d6b9e4f51325f7f17b6809513812a43","localIdentifier":[{"identifier":"10.1111/j.1440-1843.2010.01819.x","schema":"doi","url":"http://onlinelibrary.wiley.com/wol1/doi/10.1111/j.1440-1843.2010.01819.x/fullpdf"}],"typology":"publication","subType":"Article","title":["P. aeruginosa: host defence in the lung","Pseudomonas aeruginosa: Host defence in lung diseases"],"author":["Bryan J. WILLIAMS","Joanne DEHNBOSTEL","Timothy S. BLACKWELL"],"date":["2010-08-16"],"subject":[{"scheme":"keywords","value":"Pulmonary and Respiratory Medicine"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f441c6243fd6ae381c520b42349b769","target":"50|doi_________::44ebec98169daae57c106eb1a1072aae","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2018-07-12","dataInfo":null}]} -{"id":"50|doi_________::3f441c6243fd6ae381c520b42349b769","localIdentifier":[{"identifier":"10.1007/s11901-018-0414-x","schema":"doi","url":"http://link.springer.com/article/10.1007/s11901-018-0414-x/fulltext.html"}],"typology":"publication","subType":"Article","title":["DILI Associated with Skin Reactions"],"author":["Sahand Rahnama-Moghadam","Hans L. Tillmann"],"date":["2018-07-12"],"subject":null,"publisher":["Springer Science and Business Media LLC"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f73d349736eb476653a026d14222b12","target":"50|pmid________::ff99f7ec03946fa4c8f413d59f75a547","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2003-01-01","dataInfo":null}]} -{"id":"50|doi_________::3f73d349736eb476653a026d14222b12","localIdentifier":[{"identifier":"10.1002/lsm.10225","schema":"doi","url":"https://dx.doi.org/10.1002/lsm.10225"}],"typology":"publication","subType":"Article","title":["Multicenter study of noninvasive radiofrequency for periorbital tissue tightening"],"author":["Fitzpatrick, Richard","Geronemus, Roy","Goldberg, David","Kaminer, Michael","Kilmer, Suzanne","Ruiz-Esparza, Javier"],"date":["2003-01-01","2003-10-17T12:03:53Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f73d349736eb476653a026d14222b12","target":"50|pmid________::f767374d588a8d51de0f129261daa5a7","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2003-01-01","dataInfo":null}]} -{"id":"50|doi_________::3f73d349736eb476653a026d14222b12","localIdentifier":[{"identifier":"10.1002/lsm.10225","schema":"doi","url":"https://dx.doi.org/10.1002/lsm.10225"}],"typology":"publication","subType":"Article","title":["Multicenter study of noninvasive radiofrequency for periorbital tissue tightening"],"author":["Fitzpatrick, Richard","Geronemus, Roy","Goldberg, David","Kaminer, Michael","Kilmer, Suzanne","Ruiz-Esparza, Javier"],"date":["2003-01-01","2003-10-17T12:03:53Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::48c200713e34afe5c4dabf77f258f9de","target":"50|doi_________::e2d40a313240d3eb979a3172103a4d7f","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2006-11-17","dataInfo":null}]} -{"id":"50|doi_________::48c200713e34afe5c4dabf77f258f9de","localIdentifier":[{"identifier":"10.1007/s11096-006-9043-5","schema":"doi","url":"https://dx.doi.org/10.1007/s11096-006-9043-5"}],"typology":"publication","subType":"Article","title":["Patients’ attitudes towards and experiences of generic drug substitution in Norway"],"author":["Kjoenniksen, Inge","Lindbaek, Morten","Granas, Anne Gerd"],"date":["2006-11-17"],"subject":null,"publisher":["Springer Science and Business Media LLC"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::507df31b75efc911c50e0d6e25f13d5a","target":"50|doi_________::fa0760d1427b71b6cb3ffcc739751197","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2009-09-29","dataInfo":null}]} -{"id":"50|doi_________::507df31b75efc911c50e0d6e25f13d5a","localIdentifier":[{"identifier":"10.1080/10408398509527417","schema":"doi","url":"http://www.tandfonline.com/doi/pdf/10.1080/10408398509527417"}],"typology":"publication","subType":"Article","title":["The genusallium. Part 2"],"author":["Gruffydd R. Fenwick","Anthony B. Hanley","John R. Whitaker"],"date":["2009-09-30T13:53:43Z","2009-09-29"],"subject":[{"scheme":"keywords","value":"General Medicine"}],"publisher":["Informa UK Limited"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::6b9dce3b94b3bfe9649c4fb6b9e66681","target":"50|pmid________::43fb246d61ba89b7f9825d9e02856d17","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2009-01-01","dataInfo":null}]} -{"id":"50|doi_________::6b9dce3b94b3bfe9649c4fb6b9e66681","localIdentifier":[{"identifier":"10.1111/j.1742-481x.2009.00634.x","schema":"doi","url":"https://dx.doi.org/10.1111/j.1742-481x.2009.00634.x"}],"typology":"publication","subType":"Article","title":["Venous leg ulcers: patient concordance with compression therapy and its impact on healing and prevention of recurrence"],"author":["Moffatt, Christine","Kommala, Dheerendra","Dourdin, Nathalie","Choe, Yoonhee"],"date":["2009-01-01","2009-11-13T10:40:02Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::91c510d2d163e81d55283bb9c2d4d7b7","target":"50|doi_________::1d47307b88d6bb6757f71bfc56686b74","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2015-01-01","dataInfo":null}]} -{"id":"50|doi_________::91c510d2d163e81d55283bb9c2d4d7b7","localIdentifier":[{"identifier":"10.1111/jocd.12148","schema":"doi","url":"http://onlinelibrary.wiley.com/wol1/doi/10.1111/jocd.12148/fullpdf"}],"typology":"publication","subType":"Article","title":["Assessment of efficacy and tolerability of different concentrations of trichloroacetic acid vs\n. carbon dioxide laser in treatment of xanthelasma palpebrarum"],"author":["Basma Mourad","Lamia H. Elgarhy","Heba-Alla Ellakkawy","Nageh Elmahdy"],"date":["2015-08-07","2015-01-01"],"subject":[{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} \ No newline at end of file +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1602017035423,"id":"50|doi_________::002d84a55111d3d23a3ef388f8f31ad5","originalId":["10.1007/s10956-019-9769-1","9769","50|doiboost____::002d84a55111d3d23a3ef388f8f31ad5"],"pid":[{"value":"10.1007/s10956-019-9769-1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-07-29T19:20:00+0200","dateoftransformation":"1970-01-19T12:20:43+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Yeh, Heng-Yi","name":"Heng-Yi","surname":"Yeh","rank":1,"pid":null,"affiliation":[]},{"fullname":"Tsai, Yu-Hsiang","name":"Yu-Hsiang","surname":"Tsai","rank":2,"pid":null,"affiliation":[]},{"fullname":"Tsai, Chin-Chung","name":"Chin-Chung","surname":"Tsai","rank":3,"pid":null,"affiliation":[]},{"fullname":"Chang, Hsin-Yi","name":"Hsin-Yi","surname":"Chang","rank":4,"pid":[{"value":"https://orcid.org/0000-0002-9659-1022","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"General Engineering","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Education","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Investigating Students’ Conceptions of Technology-Assisted Science Learning: a Drawing Analysis","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2019-01-31","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-07-01","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-31T13:04:54Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-31","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2019-01-31","dataInfo":null},"publisher":{"value":"Springer Science and Business Media LLC","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1007/s10956-019-9769-1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1007/s10956-019-9769-1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-31","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":{"value":"http://www.springer.com/tdm","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://link.springer.com/content/pdf/10.1007/s10956-019-9769-1.pdf","http://link.springer.com/article/10.1007/s10956-019-9769-1/fulltext.html","http://dx.doi.org/10.1007/s10956-019-9769-1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1007/s10956-019-9769-1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-31","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1596054283851,"id":"50|doi_________::0035704f67a6e839f786b7390c31106e","originalId":["990","10.1186/1471-2377-14-81","50|doiboost____::0035704f67a6e839f786b7390c31106e"],"pid":[{"value":"10.1186/1471-2377-14-81","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-07-29T20:24:43Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Florien W Boele","name":"Florien W","surname":"Boele","rank":1,"pid":null,"affiliation":null},{"fullname":"Irma M Verdonck-de Leeuw","name":"Irma M","surname":"Verdonck-de Leeuw","rank":2,"pid":null,"affiliation":null},{"fullname":"Pim Cuijpers","name":"Pim","surname":"Cuijpers","rank":3,"pid":null,"affiliation":null},{"fullname":"Jaap C Reijneveld","name":"Jaap C","surname":"Reijneveld","rank":4,"pid":null,"affiliation":null},{"fullname":"Jan J Heimans","name":"Jan J","surname":"Heimans","rank":5,"pid":null,"affiliation":null},{"fullname":"Martin Klein","name":"Martin","surname":"Klein","rank":6,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Clinical Neurology","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"General Medicine","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Internet-based guided self-help for glioma patients with depressive symptoms: design of a randomized controlled trial","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2014-04-10T19:01:28Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2014-04-10","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2014-04-10","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-23","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2014-04-10","dataInfo":null},"publisher":{"value":"Springer Science and Business Media LLC","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://www.springer.com/tdm","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://link.springer.com/content/pdf/10.1186/1471-2377-14-81.pdf","http://link.springer.com/article/10.1186/1471-2377-14-81/fulltext.html","http://link.springer.com/content/pdf/10.1186/1471-2377-14-81","http://dx.doi.org/10.1186/1471-2377-14-81"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1186/1471-2377-14-81","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2014-04-10","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1186/1471-2377-14-81"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1186/1471-2377-14-81","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2014-04-10","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"BMC Neurology","issnPrinted":null,"issnOnline":"1471-2377","issnLinking":null,"ep":null,"iss":null,"sp":null,"vol":"14","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1619631198141,"id":"50|doi_________::003bc91a6b4c1565813dfdd522697b1a","originalId":["10.1039/c8tc05911j","50|doiboost____::003bc91a6b4c1565813dfdd522697b1a"],"pid":[{"value":"10.1039/c8tc05911j","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-04-28T17:33:18Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Yuqin Li","name":"Yuqin","surname":"Li","rank":1,"pid":null,"affiliation":null},{"fullname":"Siming Gao","name":"Siming","surname":"Gao","rank":2,"pid":null,"affiliation":null},{"fullname":"Nan Zhang","name":"Nan","surname":"Zhang","rank":3,"pid":null,"affiliation":null},{"fullname":"Xin Huang","name":"Xin","surname":"Huang","rank":4,"pid":null,"affiliation":null},{"fullname":"Jinchang Tian","name":"Jinchang","surname":"Tian","rank":5,"pid":null,"affiliation":null},{"fullname":"Feng Xu","name":"Feng","surname":"Xu","rank":6,"pid":null,"affiliation":null},{"fullname":"Zhizhong Sun","name":"Zhizhong","surname":"Sun","rank":7,"pid":null,"affiliation":null},{"fullname":"Shougen Yin","name":"Shougen","surname":"Yin","rank":8,"pid":null,"affiliation":null},{"fullname":"Xiaoming Wu","name":"Xiaoming","surname":"Wu","rank":9,"pid":null,"affiliation":null},{"fullname":"Wenyi Chu","name":"Wenyi","surname":"Chu","rank":10,"pid":[{"value":"http://orcid.org/0000-0002-4926-4475","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Materials Chemistry","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"General Chemistry","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Solution-processable, high luminance deep-blue organic light emitting devices based on novel naphthalene bridged bis-triphenylamine derivatives","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2019-01-29T03:05:29Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-02-28","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"The optimal device E exhibited a very high luminance of 10 407 cd m −2 and a maximum current efficiency of 7.80 cd A −1 .","dataInfo":null},{"value":"A series of naphthalene bridged bis-triphenylamine derivatives with a twisted structure was designed, synthesized and characterized. The dependence of their thermal, photophysical and electrochemical properties and performance as emitters in OLEDs on their chemical structure was systematically studied by the introduction of aryl groups with electron-donating or electron-withdrawing substituents to the naphthalene bridged bis-triphenylamine core. These compounds exhibited steady blue light emissions and high d values ranging from 468 to 500 °C. Most importantly, the deep-blue OLEDs were successfully fabricated using a solution processed method by blending PVK and PBD to improve OLED performance. The optimal device E exhibited a very high luminance of 10 407 cd m −2 and a maximum current efficiency of 7.80 cd A −1 with CIE coordinates of (0.166, 0.097). These results indicated that these compounds with the twisted naphthalene bridged bis-triphenylamine core could show stable deep-blue electroluminescence properties and introducing the electron-donating group (–OCH 3 ) could enable high luminance and current efficiency for OLEDs.","dataInfo":null}],"dateofacceptance":{"value":"2019-01-29T03:05:29Z","dataInfo":null},"publisher":{"value":"Royal Society of Chemistry (RSC)","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://rsc.li/journals-terms-of-use","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://pubs.rsc.org/en/content/articlepdf/2019/TC/C8TC05911J","http://dx.doi.org/10.1039/c8tc05911j"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1039/c8tc05911j","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-29T03:05:29Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1039/c8tc05911j"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1039/c8tc05911j","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Journal of Materials Chemistry C","issnPrinted":"2050-7526","issnOnline":"2050-7534","issnLinking":null,"ep":"2698","iss":null,"sp":"2686","vol":"7","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1611897051298,"id":"50|doi_________::0048eb8bb3f289cccc9358bf3726a772","originalId":["10.1016/s1441-3582(03)70132-3","10.1016/S1441-3582(03)70132-3","50|doiboost____::0048eb8bb3f289cccc9358bf3726a772"],"pid":[{"value":"10.1016/s1441-3582(03)70132-3","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-03-12T11:12:27+0100","dateoftransformation":"1970-01-19T09:00:07+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Ling, Bith-Hong","name":"Bith-Hong","surname":"Ling","rank":1,"pid":null,"affiliation":[]},{"fullname":"Lockshin, Larry","name":"Larry","surname":"Lockshin","rank":2,"pid":null,"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[],"title":[{"value":"Components of Wine Prices for Australian Wine: How Winery Reputation, Wine Quality, Region, Vintage, and Winery Size Contribute to the Price of Varietal Wines","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2003-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2018-12-09","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2010-07-07T08:52:15Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2021-01-27","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":" Australian wines are identified by the varietal names of the grapes rather than the regions as in France and other traditional wine producing countries. This paper uses the concept of hedonic price theory to investigate a range of extrinsic characteristics’ ability to predict prices for different climate regions (warm and cool) and four major wine varieties of Australian wines, two reds (shiraz and cabernet) and two whites (chardonnay and riesling). The effects of winery reputation (wine company/brand), winery size (production scale), age of the wine, and region of origin (wine grape source) contributing to the relationship between price and quality attributes of Australian wines are investigated, based on 1880 observations of bottled wines. Wine quality rating and winery/brand reputation have major effects on the price, while region and size of winery have differential effects depending on the variety of grape. Vintage has only a minor effect. ","dataInfo":null}],"dateofacceptance":{"value":"2003-01-01","dataInfo":null},"publisher":{"value":"Elsevier BV","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1016/s1441-3582(03)70132-3"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1016/s1441-3582(03)70132-3","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2003-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":{"value":"http://journals.sagepub.com/page/policies/text-and-data-mining-license","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://api.elsevier.com/content/article/PII:S1441358203701323?httpAccept=text/xml","https://api.elsevier.com/content/article/PII:S1441358203701323?httpAccept=text/plain","http://journals.sagepub.com/doi/pdf/10.1016/S1441-3582%2803%2970132-3","http://dx.doi.org/10.1016/s1441-3582(03)70132-3"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1016/s1441-3582(03)70132-3","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2010-07-07T08:52:15Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1613672628083,"id":"50|doi_________::005ca383dbcbecb839c1c4f525636048","originalId":["10.1080/01431161.2017.1302106","50|doiboost____::005ca383dbcbecb839c1c4f525636048"],"pid":[{"value":"10.1080/01431161.2017.1302106","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-02-18T18:23:48Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Jun Wang","name":"Jun","surname":"Wang","rank":1,"pid":null,"affiliation":null},{"fullname":"Jinye Peng","name":"Jinye","surname":"Peng","rank":2,"pid":null,"affiliation":null},{"fullname":"Xiaoyue Jiang","name":"Xiaoyue","surname":"Jiang","rank":3,"pid":null,"affiliation":null},{"fullname":"Xiaoyi Feng","name":"Xiaoyi","surname":"Feng","rank":4,"pid":null,"affiliation":null},{"fullname":"Jianhong Zhou","name":"Jianhong","surname":"Zhou","rank":5,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"General Earth and Planetary Sciences","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Remote-sensing image fusion using sparse representation with sub-dictionaries","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2017-03-24T08:21:49Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-24","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-06-18","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-24","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-28","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2017-03-24","dataInfo":null},"publisher":{"value":"Informa UK Limited","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://www.tandfonline.com/doi/pdf/10.1080/01431161.2017.1302106","http://dx.doi.org/10.1080/01431161.2017.1302106"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1080/01431161.2017.1302106","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-03-24","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1080/01431161.2017.1302106"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1080/01431161.2017.1302106","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-03-24","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"International Journal of Remote Sensing","issnPrinted":"0143-1161","issnOnline":"1366-5901","issnLinking":null,"ep":"3585","iss":null,"sp":"3564","vol":"38","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1619364899333,"id":"50|doi_________::006a8e00482f03066c79b472dfe51ba3","originalId":["10.1515/bot-2019-0045","50|doiboost____::006a8e00482f03066c79b472dfe51ba3"],"pid":[{"value":"10.1515/bot-2019-0045","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-02-08T18:30:38+0100","dateoftransformation":"1970-01-19T08:13:03+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"den Hartog, Cornelis","name":"Cornelis","surname":"den Hartog","rank":1,"pid":null,"affiliation":[]},{"fullname":"Triest, Ludwig","name":"Ludwig","surname":"Triest","rank":2,"pid":[{"value":"https://orcid.org/0000-0002-4946-9614","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Plant Science","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Aquatic Science","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Ecology, Evolution, Behavior and Systematics","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"A profound view and discourse on the typification and status of three confused taxa: Ruppia maritima, R. spiralis and R. cirrhosa","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2020-01-15","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2020-01-15","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2020-01-15T09:02:47Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2020-06-25","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"AbstractTaxonomic difficulties have persisted within the genus Ruppia for a long time. We first unravel misconceptions as perceived on different continents and subsequently present a revised interpretation of the identity and typification of three European taxa at species level: Ruppia maritima L., Ruppia spiralis L. ex Dumortier, and Ruppia cirrhosa (Petagna) Grande. To do this, historical specimens, illustrations and original descriptions were studied. We supersede a previous choice of the figure of Buccaferrea maritima, foliis minus acutis Micheli (1729) as the lectotype of R. maritima and type species of the genus Ruppia owing to a serious conflict with the protologue. Based on a meticulous interpretation of protologues and figures in a historical context, we reject the recent view of assigning R. cirrhosa and its proposed lectotype (iconotype) as a homotypic synonym of R. maritima. We agree with an earlier lectotypification of R. spiralis, though for another reason than the above-mentioned abused homotypy. Consequently, R. cirrhosa is a synonym of neither R. maritima or R. spiralis, based on material from Petagna in the Herbarium of Naples designated as the holotype of R. cirrhosa. We argue for three species to be considered as fully independent taxa: R. maritima, R. spiralis and R. cirrhosa.","dataInfo":null}],"dateofacceptance":{"value":"2020-01-15","dataInfo":null},"publisher":{"value":"Walter de Gruyter GmbH","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1515/bot-2019-0045"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1515/bot-2019-0045","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2020-01-15","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://www.degruyter.com/view/journals/botm/63/3/article-p229.xml","https://www.degruyter.com/document/doi/10.1515/bot-2019-0045/pdf","http://dx.doi.org/10.1515/bot-2019-0045"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1515/bot-2019-0045","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2020-06-25","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1588058636007,"id":"50|doi_________::00a55b7fae8de31a2ffa4e235e98a7bf","originalId":["10.1109/tns.2004.835620","50|doiboost____::00a55b7fae8de31a2ffa4e235e98a7bf"],"pid":[{"value":"10.1109/tns.2004.835620","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-04-30T20:50:59+0200","dateoftransformation":"1970-01-19T10:11:12+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Veloso, J.F.C.A.","name":"J.F.C.A.","surname":"Veloso","rank":1,"pid":null,"affiliation":[]},{"fullname":"Amaro, F.","name":"F.","surname":"Amaro","rank":2,"pid":null,"affiliation":[]},{"fullname":"dos Santos, J.M.F.","name":"J.M.F.","surname":"dos Santos","rank":3,"pid":null,"affiliation":[]},{"fullname":"Mir, J.A.","name":"J.A.","surname":"Mir","rank":4,"pid":null,"affiliation":[]},{"fullname":"Derbyshire, G.E.","name":"G.E.","surname":"Derbyshire","rank":5,"pid":null,"affiliation":[]},{"fullname":"Stephenson, R.","name":"R.","surname":"Stephenson","rank":6,"pid":null,"affiliation":[]},{"fullname":"Rhodes, N.J.","name":"N.J.","surname":"Rhodes","rank":7,"pid":null,"affiliation":[]},{"fullname":"Schooneveld, E.M.","name":"E.M.","surname":"Schooneveld","rank":8,"pid":null,"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Nuclear and High Energy Physics","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Electrical and Electronic Engineering","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Nuclear Energy and Engineering","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Application of the microhole and strip plate detector for neutron detection","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2004-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-14","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2004-10-19T08:20:44Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2004-01-01","dataInfo":null},"publisher":{"value":"Institute of Electrical and Electronics Engineers (IEEE)","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1109/tns.2004.835620"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1109/tns.2004.835620","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2004-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://xplorestaging.ieee.org/ielx5/23/29603/01344292.pdf?arnumber=1344292","http://dx.doi.org/10.1109/tns.2004.835620"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1109/tns.2004.835620","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2004-10-19T08:20:44Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1592299453363,"id":"50|doi_________::00ad40afc28e0c860116ac595183a0c0","originalId":["S0377221797004529","10.1016/s0377-2217(97)00452-9","50|doiboost____::00ad40afc28e0c860116ac595183a0c0"],"pid":[{"value":"10.1016/s0377-2217(97)00452-9","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-06-16T09:24:13Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Chung-Yee Lee","name":"Chung-Yee","surname":"Lee","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[],"title":[{"value":"Two-machine flowshop scheduling with availability constraints","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2002-07-25T17:48:22Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"1999-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-04-24","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2002-07-25T17:48:22Z","dataInfo":null},"publisher":{"value":"Elsevier BV","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"https://www.elsevier.com/tdm/userlicense/1.0/","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://api.elsevier.com/content/article/PII:S0377221797004529?httpAccept=text/xml","https://api.elsevier.com/content/article/PII:S0377221797004529?httpAccept=text/plain","http://dx.doi.org/10.1016/s0377-2217(97)00452-9"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1016/s0377-2217(97)00452-9","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2002-07-25T17:48:22Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1016/s0377-2217(97)00452-9"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1016/s0377-2217(97)00452-9","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"1999-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"European Journal of Operational Research","issnPrinted":"0377-2217","issnOnline":null,"issnLinking":null,"ep":"429","iss":null,"sp":"420","vol":"114","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1609910911587,"id":"50|doi_________::00d8413c6d6f5c091ab6dda88f0a0ecb","originalId":["10.1175/jpo-d-16-0281.1","50|doiboost____::00d8413c6d6f5c091ab6dda88f0a0ecb"],"pid":[{"value":"10.1175/jpo-d-16-0281.1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-01-06T05:28:31Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"P. B. Smit","name":"P. B.","surname":"Smit","rank":1,"pid":null,"affiliation":null},{"fullname":"T. T. Janssen","name":"T. T.","surname":"Janssen","rank":2,"pid":null,"affiliation":null},{"fullname":"T. H. C. Herbers","name":"T. H. C.","surname":"Herbers","rank":3,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Oceanography","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Nonlinear Wave Kinematics near the Ocean Surface","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2017-05-09T19:33:59Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-11-16","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"AbstractEstimation of second-order, near-surface wave kinematics is important for interpretation of ocean surface remote sensing and surface-following instruments, determining loading on offshore structures, and understanding of upper-ocean transport processes. Unfortunately, conventional wave theories based on Stokes-type expansions do not consider fluid motions at levels above the unperturbed fluid level. The usual practice of extrapolating the fluid kinematics from the unperturbed free surface to higher points in the fluid is generally reasonable for narrowband waves, but for broadband ocean waves this results in dramatic (and nonphysical) overestimation of surface velocities. Consequently, practical approximations for random waves are at best empirical and are often only loosely constrained by physical principles. In the present work, the authors formulate the governing equations for water waves in an incompressible and inviscid fluid, using a boundary-fitted coordinate system (i.e., sigma or s coordinates) to derive expressions for near-surface kinematics in nonlinear random waves from first principles. Comparison to a numerical model valid for highly nonlinear waves shows that the new results 1) are consistent with second-order Stokes theory, 2) are similar to extrapolation methods in narrowband waves, and 3) greatly improve estimates of surface kinematics in random seas.","dataInfo":null}],"dateofacceptance":{"value":"2017-05-09T19:33:59Z","dataInfo":null},"publisher":{"value":"American Meteorological Society","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://journals.ametsoc.org/view/journals/phoc/47/7/jpo-d-16-0281.1.xml","http://journals.ametsoc.org/jpo/article-pdf/47/7/1657/4810788/jpo-d-16-0281_1.pdf","https://journals.ametsoc.org/downloadpdf/journals/phoc/47/7/jpo-d-16-0281.1.xml","http://dx.doi.org/10.1175/jpo-d-16-0281.1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1175/jpo-d-16-0281.1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-05-09T19:33:59Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1175/jpo-d-16-0281.1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1175/jpo-d-16-0281.1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Journal of Physical Oceanography","issnPrinted":"0022-3670","issnOnline":"1520-0485","issnLinking":null,"ep":"1673","iss":null,"sp":"1657","vol":"47","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1582142606255,"id":"50|doi_________::00df77889b1ee0af015524c627f0cf47","originalId":["10.1109/isscc.2010.5433999","50|doiboost____::00df77889b1ee0af015524c627f0cf47"],"pid":[{"value":"10.1109/isscc.2010.5433999","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-02-19T20:03:26Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Sameh A Ibrahim","name":"Sameh A","surname":"Ibrahim","rank":1,"pid":null,"affiliation":null},{"fullname":"Behzad Razavi","name":"Behzad","surname":"Razavi","rank":2,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[],"title":[{"value":"A 20Gb/s 40mW equalizer in 90nm CMOS technology","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2010-03-24T14:35:14Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2010-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-06-19","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2010-03-24T14:35:14Z","dataInfo":null},"publisher":{"value":"IEEE","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0004","classname":"0004","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://xplorestaging.ieee.org/ielx5/5428240/5433812/05433999.pdf?arnumber=5433999","http://dx.doi.org/10.1109/isscc.2010.5433999"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1109/isscc.2010.5433999","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2010-03-24T14:35:14Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1109/isscc.2010.5433999"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1109/isscc.2010.5433999","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2010-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"2010 IEEE International Solid-State Circuits Conference - (ISSCC)","issnPrinted":null,"issnOnline":null,"issnLinking":null,"ep":null,"iss":null,"sp":null,"vol":null,"edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} \ No newline at end of file From 8f7623e77afe2edce3681a81a0283f007f733c9d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:14:20 +0200 Subject: [PATCH 131/287] Hosted By Map - refactoring and application of the new aggregator --- .../SparkApplyHostedByMapToDatasource.scala | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 4ecea63f3..fad313f1c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -45,7 +45,7 @@ object SparkApplyHostedByMapToDatasource { val graphPath = parser.get("graphPath") val outputPath = parser.get("outputPath") - val workingPath = parser.get("workingPath") + val preparedInfoPath = parser.get("preparedInfoPath") implicit val formats = DefaultFormats @@ -55,17 +55,15 @@ object SparkApplyHostedByMapToDatasource { implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val dats : Dataset[Datasource] = spark.read.textFile("$graphPath/datasource") + val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") .map(r => mapper.readValue(r, classOf[Datasource])) - val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") - .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - - + val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) //c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(s"$graphPath/datasource") + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) } From 8ba8c77f92a82104076649fb3ba4fa80d13fd449 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:14:57 +0200 Subject: [PATCH 132/287] Hosted By Map - refactoring --- .../hostedbymap/SparkApplyHostedByMapToResult.scala | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 37afc319a..312513285 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -29,7 +29,6 @@ object SparkApplyHostedByMapToResult { val i = p.getInstance().asScala if (i.size == 1) { val inst: Instance = i(0) - inst.getHostedby.setKey(ei.getHb_id) inst.getHostedby.setValue(ei.getName) if (ei.getOpenaccess) { @@ -60,7 +59,7 @@ object SparkApplyHostedByMapToResult { val graphPath = parser.get("graphPath") val outputPath = parser.get("outputPath") - val workingPath = parser.get("workingPath") + val preparedInfoPath = parser.get("preparedInfoPath") implicit val formats = DefaultFormats @@ -70,15 +69,15 @@ object SparkApplyHostedByMapToResult { implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val pubs : Dataset[Publication] = spark.read.textFile("$graphPath/publication") + val pubs : Dataset[Publication] = spark.read.textFile(graphPath + "/publication") .map(r => mapper.readValue(r, classOf[Publication])) - val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") + val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo])) //a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance // nel result => salto)con l'hosted by anche access right della instance se openaccess e' true - applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json("$graphPath/publication") + applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) From 1e952cccf644a27180684b62851171ccdfd313d3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:15:43 +0200 Subject: [PATCH 133/287] Hosted By Map - refactoring and deletion of not needed methods --- .../SparkPrepareHostedByInfoToApply.scala | 35 ++----------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index 7395b2450..d342d57b0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DatasourceInfo, EntityInfo} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} import org.apache.commons.io.IOUtils @@ -17,8 +17,6 @@ import org.slf4j.{Logger, LoggerFactory} object SparkPrepareHostedByInfoToApply { - - implicit val mapEncoderDSInfo: Encoder[DatasourceInfo] = Encoders.bean(classOf[DatasourceInfo]) implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { @@ -58,33 +56,6 @@ object SparkPrepareHostedByInfoToApply { toEntityItem(c.keys.head, c.values.head) } - def explodeJournalInfo(input: DatasourceInfo): List[EntityInfo] = { - var lst : List[EntityInfo] = List() - if (input.getEissn != null && !input.getEissn.equals("")){ - lst = EntityInfo.newInstance(input.getId, input.getEissn, input.getOfficialname, input.getOpenAccess) :: lst - } - if (input.getLissn != null && !input.getLissn.equals("")){ - lst = EntityInfo.newInstance(input.getId, input.getLissn, input.getOfficialname, input.getOpenAccess) :: lst - } - if (input.getIssn != null && !input.getIssn.equals("")){ - lst = EntityInfo.newInstance(input.getId, input.getIssn, input.getOfficialname, input.getOpenAccess) :: lst - } - - lst - } - - def joinDsandHBM(left:Dataset[DatasourceInfo], right:Dataset[HostedByItemType]): Dataset[EntityInfo] = { - left.joinWith(right, - left.col("id").equalTo(right.col("id")), "left") - .map(t2 => { - val dsi : DatasourceInfo = t2._1 - if(t2._2 != null){ - val hbi : HostedByItemType = t2._2 - dsi.setOpenAccess(hbi.openAccess) - } - dsi - }).flatMap(explodeJournalInfo) - } def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { @@ -123,7 +94,7 @@ object SparkPrepareHostedByInfoToApply { val graphPath = parser.get("graphPath") - val outputPath = parser.get("outputPath") + val outputPath = parser.get("preparedInfoPath") val hostedByMapPath = parser.get("hostedByMapPath") @@ -139,7 +110,7 @@ object SparkPrepareHostedByInfoToApply { val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) //STEP2: creare la mappa publication id issn, eissn, lissn esplosa - val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, "$graphPath/publication") + val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") //STEP3: join resultInfo con hostedByInfo sul journal_id dal result con left // e riduzione di tutti i result con lo stesso id in una unica entry con aggiunto l'id della datasource From 74afe43c3abaca564998f3bf91840276c03d4c85 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 4 Aug 2021 10:16:17 +0200 Subject: [PATCH 134/287] fixed wrong test file --- .../eu/dnetlib/dhp/sx/graph/oaf_to_summary | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary index 4d581044a..e1fd758b4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary @@ -1,20 +1,10 @@ -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::0f2129f0a8ddfb099b9fabba3105245f","target":"50|doi_________::4af011e641e0ba286660fd24a3f603b7","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2018-01-01","dataInfo":null}]} -{"id":"50|doi_________::0f2129f0a8ddfb099b9fabba3105245f","localIdentifier":[{"identifier":"10.1111/1346-8138.14162","schema":"doi","url":"http://onlinelibrary.wiley.com/wol1/doi/10.1111/1346-8138.14162/fullpdf"}],"typology":"publication","subType":"Article","title":["Guideline of SSc","Diagnostic criteria, severity classification and guidelines of systemic sclerosis"],"author":["Yoshihide Asano","Masatoshi Jinnin","Yasushi Kawaguchi","Masataka Kuwana","Daisuke Goto","Shinichi Sato","Kazuhiko Takehara","Masaru Hatano","Manabu Fujimoto","Naoki Mugii","Hironobu Ihn"],"date":["2018-04-23","2018-01-01"],"subject":null,"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::1b57d5ebe71734c1fa98624d9609971e","target":"50|doi_________::7e79063f205480e61ee7fdcf7ab03bad","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2004-11-01","dataInfo":null}]} -{"id":"50|doi_________::1b57d5ebe71734c1fa98624d9609971e","localIdentifier":[{"identifier":"10.1002/ajmg.a.30270","schema":"doi","url":"https://onlinelibrary.wiley.com/doi/full/10.1002/ajmg.a.30270"}],"typology":"publication","subType":"Article","title":["Clinical variability in a Noonan syndrome family with a newPTPN11 gene mutation"],"author":["D�bora Romeo Bertola","Alexandre C. Pereira","Paulo S.L. de Oliveira","Chong A. Kim","Jos� Eduardo Krieger"],"date":["2004-09-21T23:19:41Z","2004-11-01"],"subject":[{"scheme":"keywords","value":"Genetics(clinical)"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3d6b9e4f51325f7f17b6809513812a43","target":"50|doi_________::edb21431e0271061e0dddc248300708a","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2010-08-16","dataInfo":null}]} -{"id":"50|doi_________::3d6b9e4f51325f7f17b6809513812a43","localIdentifier":[{"identifier":"10.1111/j.1440-1843.2010.01819.x","schema":"doi","url":"http://onlinelibrary.wiley.com/wol1/doi/10.1111/j.1440-1843.2010.01819.x/fullpdf"}],"typology":"publication","subType":"Article","title":["P. aeruginosa: host defence in the lung","Pseudomonas aeruginosa: Host defence in lung diseases"],"author":["Bryan J. WILLIAMS","Joanne DEHNBOSTEL","Timothy S. BLACKWELL"],"date":["2010-08-16"],"subject":[{"scheme":"keywords","value":"Pulmonary and Respiratory Medicine"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f441c6243fd6ae381c520b42349b769","target":"50|doi_________::44ebec98169daae57c106eb1a1072aae","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2018-07-12","dataInfo":null}]} -{"id":"50|doi_________::3f441c6243fd6ae381c520b42349b769","localIdentifier":[{"identifier":"10.1007/s11901-018-0414-x","schema":"doi","url":"http://link.springer.com/article/10.1007/s11901-018-0414-x/fulltext.html"}],"typology":"publication","subType":"Article","title":["DILI Associated with Skin Reactions"],"author":["Sahand Rahnama-Moghadam","Hans L. Tillmann"],"date":["2018-07-12"],"subject":null,"publisher":["Springer Science and Business Media LLC"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f73d349736eb476653a026d14222b12","target":"50|pmid________::ff99f7ec03946fa4c8f413d59f75a547","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2003-01-01","dataInfo":null}]} -{"id":"50|doi_________::3f73d349736eb476653a026d14222b12","localIdentifier":[{"identifier":"10.1002/lsm.10225","schema":"doi","url":"https://dx.doi.org/10.1002/lsm.10225"}],"typology":"publication","subType":"Article","title":["Multicenter study of noninvasive radiofrequency for periorbital tissue tightening"],"author":["Fitzpatrick, Richard","Geronemus, Roy","Goldberg, David","Kaminer, Michael","Kilmer, Suzanne","Ruiz-Esparza, Javier"],"date":["2003-01-01","2003-10-17T12:03:53Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f73d349736eb476653a026d14222b12","target":"50|pmid________::f767374d588a8d51de0f129261daa5a7","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2003-01-01","dataInfo":null}]} -{"id":"50|doi_________::3f73d349736eb476653a026d14222b12","localIdentifier":[{"identifier":"10.1002/lsm.10225","schema":"doi","url":"https://dx.doi.org/10.1002/lsm.10225"}],"typology":"publication","subType":"Article","title":["Multicenter study of noninvasive radiofrequency for periorbital tissue tightening"],"author":["Fitzpatrick, Richard","Geronemus, Roy","Goldberg, David","Kaminer, Michael","Kilmer, Suzanne","Ruiz-Esparza, Javier"],"date":["2003-01-01","2003-10-17T12:03:53Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::48c200713e34afe5c4dabf77f258f9de","target":"50|doi_________::e2d40a313240d3eb979a3172103a4d7f","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2006-11-17","dataInfo":null}]} -{"id":"50|doi_________::48c200713e34afe5c4dabf77f258f9de","localIdentifier":[{"identifier":"10.1007/s11096-006-9043-5","schema":"doi","url":"https://dx.doi.org/10.1007/s11096-006-9043-5"}],"typology":"publication","subType":"Article","title":["Patients’ attitudes towards and experiences of generic drug substitution in Norway"],"author":["Kjoenniksen, Inge","Lindbaek, Morten","Granas, Anne Gerd"],"date":["2006-11-17"],"subject":null,"publisher":["Springer Science and Business Media LLC"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::507df31b75efc911c50e0d6e25f13d5a","target":"50|doi_________::fa0760d1427b71b6cb3ffcc739751197","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2009-09-29","dataInfo":null}]} -{"id":"50|doi_________::507df31b75efc911c50e0d6e25f13d5a","localIdentifier":[{"identifier":"10.1080/10408398509527417","schema":"doi","url":"http://www.tandfonline.com/doi/pdf/10.1080/10408398509527417"}],"typology":"publication","subType":"Article","title":["The genusallium. Part 2"],"author":["Gruffydd R. Fenwick","Anthony B. Hanley","John R. Whitaker"],"date":["2009-09-30T13:53:43Z","2009-09-29"],"subject":[{"scheme":"keywords","value":"General Medicine"}],"publisher":["Informa UK Limited"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::6b9dce3b94b3bfe9649c4fb6b9e66681","target":"50|pmid________::43fb246d61ba89b7f9825d9e02856d17","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2009-01-01","dataInfo":null}]} -{"id":"50|doi_________::6b9dce3b94b3bfe9649c4fb6b9e66681","localIdentifier":[{"identifier":"10.1111/j.1742-481x.2009.00634.x","schema":"doi","url":"https://dx.doi.org/10.1111/j.1742-481x.2009.00634.x"}],"typology":"publication","subType":"Article","title":["Venous leg ulcers: patient concordance with compression therapy and its impact on healing and prevention of recurrence"],"author":["Moffatt, Christine","Kommala, Dheerendra","Dourdin, Nathalie","Choe, Yoonhee"],"date":["2009-01-01","2009-11-13T10:40:02Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::91c510d2d163e81d55283bb9c2d4d7b7","target":"50|doi_________::1d47307b88d6bb6757f71bfc56686b74","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2015-01-01","dataInfo":null}]} -{"id":"50|doi_________::91c510d2d163e81d55283bb9c2d4d7b7","localIdentifier":[{"identifier":"10.1111/jocd.12148","schema":"doi","url":"http://onlinelibrary.wiley.com/wol1/doi/10.1111/jocd.12148/fullpdf"}],"typology":"publication","subType":"Article","title":["Assessment of efficacy and tolerability of different concentrations of trichloroacetic acid vs\n. carbon dioxide laser in treatment of xanthelasma palpebrarum"],"author":["Basma Mourad","Lamia H. Elgarhy","Heba-Alla Ellakkawy","Nageh Elmahdy"],"date":["2015-08-07","2015-01-01"],"subject":[{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} \ No newline at end of file +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1602017035423,"id":"50|doi_________::002d84a55111d3d23a3ef388f8f31ad5","originalId":["10.1007/s10956-019-9769-1","9769","50|doiboost____::002d84a55111d3d23a3ef388f8f31ad5"],"pid":[{"value":"10.1007/s10956-019-9769-1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-07-29T19:20:00+0200","dateoftransformation":"1970-01-19T12:20:43+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Yeh, Heng-Yi","name":"Heng-Yi","surname":"Yeh","rank":1,"pid":null,"affiliation":[]},{"fullname":"Tsai, Yu-Hsiang","name":"Yu-Hsiang","surname":"Tsai","rank":2,"pid":null,"affiliation":[]},{"fullname":"Tsai, Chin-Chung","name":"Chin-Chung","surname":"Tsai","rank":3,"pid":null,"affiliation":[]},{"fullname":"Chang, Hsin-Yi","name":"Hsin-Yi","surname":"Chang","rank":4,"pid":[{"value":"https://orcid.org/0000-0002-9659-1022","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"General Engineering","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Education","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Investigating Students’ Conceptions of Technology-Assisted Science Learning: a Drawing Analysis","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2019-01-31","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-07-01","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-31T13:04:54Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-31","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2019-01-31","dataInfo":null},"publisher":{"value":"Springer Science and Business Media LLC","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1007/s10956-019-9769-1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1007/s10956-019-9769-1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-31","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":{"value":"http://www.springer.com/tdm","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://link.springer.com/content/pdf/10.1007/s10956-019-9769-1.pdf","http://link.springer.com/article/10.1007/s10956-019-9769-1/fulltext.html","http://dx.doi.org/10.1007/s10956-019-9769-1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1007/s10956-019-9769-1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-31","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1596054283851,"id":"50|doi_________::0035704f67a6e839f786b7390c31106e","originalId":["990","10.1186/1471-2377-14-81","50|doiboost____::0035704f67a6e839f786b7390c31106e"],"pid":[{"value":"10.1186/1471-2377-14-81","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-07-29T20:24:43Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Florien W Boele","name":"Florien W","surname":"Boele","rank":1,"pid":null,"affiliation":null},{"fullname":"Irma M Verdonck-de Leeuw","name":"Irma M","surname":"Verdonck-de Leeuw","rank":2,"pid":null,"affiliation":null},{"fullname":"Pim Cuijpers","name":"Pim","surname":"Cuijpers","rank":3,"pid":null,"affiliation":null},{"fullname":"Jaap C Reijneveld","name":"Jaap C","surname":"Reijneveld","rank":4,"pid":null,"affiliation":null},{"fullname":"Jan J Heimans","name":"Jan J","surname":"Heimans","rank":5,"pid":null,"affiliation":null},{"fullname":"Martin Klein","name":"Martin","surname":"Klein","rank":6,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Clinical Neurology","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"General Medicine","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Internet-based guided self-help for glioma patients with depressive symptoms: design of a randomized controlled trial","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2014-04-10T19:01:28Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2014-04-10","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2014-04-10","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-23","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2014-04-10","dataInfo":null},"publisher":{"value":"Springer Science and Business Media LLC","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://www.springer.com/tdm","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://link.springer.com/content/pdf/10.1186/1471-2377-14-81.pdf","http://link.springer.com/article/10.1186/1471-2377-14-81/fulltext.html","http://link.springer.com/content/pdf/10.1186/1471-2377-14-81","http://dx.doi.org/10.1186/1471-2377-14-81"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1186/1471-2377-14-81","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2014-04-10","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1186/1471-2377-14-81"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1186/1471-2377-14-81","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2014-04-10","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"BMC Neurology","issnPrinted":null,"issnOnline":"1471-2377","issnLinking":null,"ep":null,"iss":null,"sp":null,"vol":"14","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1619631198141,"id":"50|doi_________::003bc91a6b4c1565813dfdd522697b1a","originalId":["10.1039/c8tc05911j","50|doiboost____::003bc91a6b4c1565813dfdd522697b1a"],"pid":[{"value":"10.1039/c8tc05911j","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-04-28T17:33:18Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Yuqin Li","name":"Yuqin","surname":"Li","rank":1,"pid":null,"affiliation":null},{"fullname":"Siming Gao","name":"Siming","surname":"Gao","rank":2,"pid":null,"affiliation":null},{"fullname":"Nan Zhang","name":"Nan","surname":"Zhang","rank":3,"pid":null,"affiliation":null},{"fullname":"Xin Huang","name":"Xin","surname":"Huang","rank":4,"pid":null,"affiliation":null},{"fullname":"Jinchang Tian","name":"Jinchang","surname":"Tian","rank":5,"pid":null,"affiliation":null},{"fullname":"Feng Xu","name":"Feng","surname":"Xu","rank":6,"pid":null,"affiliation":null},{"fullname":"Zhizhong Sun","name":"Zhizhong","surname":"Sun","rank":7,"pid":null,"affiliation":null},{"fullname":"Shougen Yin","name":"Shougen","surname":"Yin","rank":8,"pid":null,"affiliation":null},{"fullname":"Xiaoming Wu","name":"Xiaoming","surname":"Wu","rank":9,"pid":null,"affiliation":null},{"fullname":"Wenyi Chu","name":"Wenyi","surname":"Chu","rank":10,"pid":[{"value":"http://orcid.org/0000-0002-4926-4475","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Materials Chemistry","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"General Chemistry","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Solution-processable, high luminance deep-blue organic light emitting devices based on novel naphthalene bridged bis-triphenylamine derivatives","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2019-01-29T03:05:29Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-02-28","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"The optimal device E exhibited a very high luminance of 10 407 cd m −2 and a maximum current efficiency of 7.80 cd A −1 .","dataInfo":null},{"value":"A series of naphthalene bridged bis-triphenylamine derivatives with a twisted structure was designed, synthesized and characterized. The dependence of their thermal, photophysical and electrochemical properties and performance as emitters in OLEDs on their chemical structure was systematically studied by the introduction of aryl groups with electron-donating or electron-withdrawing substituents to the naphthalene bridged bis-triphenylamine core. These compounds exhibited steady blue light emissions and high d values ranging from 468 to 500 °C. Most importantly, the deep-blue OLEDs were successfully fabricated using a solution processed method by blending PVK and PBD to improve OLED performance. The optimal device E exhibited a very high luminance of 10 407 cd m −2 and a maximum current efficiency of 7.80 cd A −1 with CIE coordinates of (0.166, 0.097). These results indicated that these compounds with the twisted naphthalene bridged bis-triphenylamine core could show stable deep-blue electroluminescence properties and introducing the electron-donating group (–OCH 3 ) could enable high luminance and current efficiency for OLEDs.","dataInfo":null}],"dateofacceptance":{"value":"2019-01-29T03:05:29Z","dataInfo":null},"publisher":{"value":"Royal Society of Chemistry (RSC)","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://rsc.li/journals-terms-of-use","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://pubs.rsc.org/en/content/articlepdf/2019/TC/C8TC05911J","http://dx.doi.org/10.1039/c8tc05911j"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1039/c8tc05911j","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-29T03:05:29Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1039/c8tc05911j"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1039/c8tc05911j","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Journal of Materials Chemistry C","issnPrinted":"2050-7526","issnOnline":"2050-7534","issnLinking":null,"ep":"2698","iss":null,"sp":"2686","vol":"7","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1611897051298,"id":"50|doi_________::0048eb8bb3f289cccc9358bf3726a772","originalId":["10.1016/s1441-3582(03)70132-3","10.1016/S1441-3582(03)70132-3","50|doiboost____::0048eb8bb3f289cccc9358bf3726a772"],"pid":[{"value":"10.1016/s1441-3582(03)70132-3","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-03-12T11:12:27+0100","dateoftransformation":"1970-01-19T09:00:07+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Ling, Bith-Hong","name":"Bith-Hong","surname":"Ling","rank":1,"pid":null,"affiliation":[]},{"fullname":"Lockshin, Larry","name":"Larry","surname":"Lockshin","rank":2,"pid":null,"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[],"title":[{"value":"Components of Wine Prices for Australian Wine: How Winery Reputation, Wine Quality, Region, Vintage, and Winery Size Contribute to the Price of Varietal Wines","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2003-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2018-12-09","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2010-07-07T08:52:15Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2021-01-27","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":" Australian wines are identified by the varietal names of the grapes rather than the regions as in France and other traditional wine producing countries. This paper uses the concept of hedonic price theory to investigate a range of extrinsic characteristics’ ability to predict prices for different climate regions (warm and cool) and four major wine varieties of Australian wines, two reds (shiraz and cabernet) and two whites (chardonnay and riesling). The effects of winery reputation (wine company/brand), winery size (production scale), age of the wine, and region of origin (wine grape source) contributing to the relationship between price and quality attributes of Australian wines are investigated, based on 1880 observations of bottled wines. Wine quality rating and winery/brand reputation have major effects on the price, while region and size of winery have differential effects depending on the variety of grape. Vintage has only a minor effect. ","dataInfo":null}],"dateofacceptance":{"value":"2003-01-01","dataInfo":null},"publisher":{"value":"Elsevier BV","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1016/s1441-3582(03)70132-3"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1016/s1441-3582(03)70132-3","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2003-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":{"value":"http://journals.sagepub.com/page/policies/text-and-data-mining-license","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://api.elsevier.com/content/article/PII:S1441358203701323?httpAccept=text/xml","https://api.elsevier.com/content/article/PII:S1441358203701323?httpAccept=text/plain","http://journals.sagepub.com/doi/pdf/10.1016/S1441-3582%2803%2970132-3","http://dx.doi.org/10.1016/s1441-3582(03)70132-3"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1016/s1441-3582(03)70132-3","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2010-07-07T08:52:15Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1613672628083,"id":"50|doi_________::005ca383dbcbecb839c1c4f525636048","originalId":["10.1080/01431161.2017.1302106","50|doiboost____::005ca383dbcbecb839c1c4f525636048"],"pid":[{"value":"10.1080/01431161.2017.1302106","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-02-18T18:23:48Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Jun Wang","name":"Jun","surname":"Wang","rank":1,"pid":null,"affiliation":null},{"fullname":"Jinye Peng","name":"Jinye","surname":"Peng","rank":2,"pid":null,"affiliation":null},{"fullname":"Xiaoyue Jiang","name":"Xiaoyue","surname":"Jiang","rank":3,"pid":null,"affiliation":null},{"fullname":"Xiaoyi Feng","name":"Xiaoyi","surname":"Feng","rank":4,"pid":null,"affiliation":null},{"fullname":"Jianhong Zhou","name":"Jianhong","surname":"Zhou","rank":5,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"General Earth and Planetary Sciences","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Remote-sensing image fusion using sparse representation with sub-dictionaries","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2017-03-24T08:21:49Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-24","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-06-18","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-24","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-28","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2017-03-24","dataInfo":null},"publisher":{"value":"Informa UK Limited","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://www.tandfonline.com/doi/pdf/10.1080/01431161.2017.1302106","http://dx.doi.org/10.1080/01431161.2017.1302106"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1080/01431161.2017.1302106","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-03-24","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1080/01431161.2017.1302106"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1080/01431161.2017.1302106","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-03-24","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"International Journal of Remote Sensing","issnPrinted":"0143-1161","issnOnline":"1366-5901","issnLinking":null,"ep":"3585","iss":null,"sp":"3564","vol":"38","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1619364899333,"id":"50|doi_________::006a8e00482f03066c79b472dfe51ba3","originalId":["10.1515/bot-2019-0045","50|doiboost____::006a8e00482f03066c79b472dfe51ba3"],"pid":[{"value":"10.1515/bot-2019-0045","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-02-08T18:30:38+0100","dateoftransformation":"1970-01-19T08:13:03+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"den Hartog, Cornelis","name":"Cornelis","surname":"den Hartog","rank":1,"pid":null,"affiliation":[]},{"fullname":"Triest, Ludwig","name":"Ludwig","surname":"Triest","rank":2,"pid":[{"value":"https://orcid.org/0000-0002-4946-9614","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Plant Science","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Aquatic Science","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Ecology, Evolution, Behavior and Systematics","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"A profound view and discourse on the typification and status of three confused taxa: Ruppia maritima, R. spiralis and R. cirrhosa","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2020-01-15","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2020-01-15","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2020-01-15T09:02:47Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2020-06-25","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"AbstractTaxonomic difficulties have persisted within the genus Ruppia for a long time. We first unravel misconceptions as perceived on different continents and subsequently present a revised interpretation of the identity and typification of three European taxa at species level: Ruppia maritima L., Ruppia spiralis L. ex Dumortier, and Ruppia cirrhosa (Petagna) Grande. To do this, historical specimens, illustrations and original descriptions were studied. We supersede a previous choice of the figure of Buccaferrea maritima, foliis minus acutis Micheli (1729) as the lectotype of R. maritima and type species of the genus Ruppia owing to a serious conflict with the protologue. Based on a meticulous interpretation of protologues and figures in a historical context, we reject the recent view of assigning R. cirrhosa and its proposed lectotype (iconotype) as a homotypic synonym of R. maritima. We agree with an earlier lectotypification of R. spiralis, though for another reason than the above-mentioned abused homotypy. Consequently, R. cirrhosa is a synonym of neither R. maritima or R. spiralis, based on material from Petagna in the Herbarium of Naples designated as the holotype of R. cirrhosa. We argue for three species to be considered as fully independent taxa: R. maritima, R. spiralis and R. cirrhosa.","dataInfo":null}],"dateofacceptance":{"value":"2020-01-15","dataInfo":null},"publisher":{"value":"Walter de Gruyter GmbH","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1515/bot-2019-0045"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1515/bot-2019-0045","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2020-01-15","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://www.degruyter.com/view/journals/botm/63/3/article-p229.xml","https://www.degruyter.com/document/doi/10.1515/bot-2019-0045/pdf","http://dx.doi.org/10.1515/bot-2019-0045"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1515/bot-2019-0045","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2020-06-25","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1588058636007,"id":"50|doi_________::00a55b7fae8de31a2ffa4e235e98a7bf","originalId":["10.1109/tns.2004.835620","50|doiboost____::00a55b7fae8de31a2ffa4e235e98a7bf"],"pid":[{"value":"10.1109/tns.2004.835620","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-04-30T20:50:59+0200","dateoftransformation":"1970-01-19T10:11:12+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Veloso, J.F.C.A.","name":"J.F.C.A.","surname":"Veloso","rank":1,"pid":null,"affiliation":[]},{"fullname":"Amaro, F.","name":"F.","surname":"Amaro","rank":2,"pid":null,"affiliation":[]},{"fullname":"dos Santos, J.M.F.","name":"J.M.F.","surname":"dos Santos","rank":3,"pid":null,"affiliation":[]},{"fullname":"Mir, J.A.","name":"J.A.","surname":"Mir","rank":4,"pid":null,"affiliation":[]},{"fullname":"Derbyshire, G.E.","name":"G.E.","surname":"Derbyshire","rank":5,"pid":null,"affiliation":[]},{"fullname":"Stephenson, R.","name":"R.","surname":"Stephenson","rank":6,"pid":null,"affiliation":[]},{"fullname":"Rhodes, N.J.","name":"N.J.","surname":"Rhodes","rank":7,"pid":null,"affiliation":[]},{"fullname":"Schooneveld, E.M.","name":"E.M.","surname":"Schooneveld","rank":8,"pid":null,"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Nuclear and High Energy Physics","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Electrical and Electronic Engineering","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Nuclear Energy and Engineering","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Application of the microhole and strip plate detector for neutron detection","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2004-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-14","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2004-10-19T08:20:44Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2004-01-01","dataInfo":null},"publisher":{"value":"Institute of Electrical and Electronics Engineers (IEEE)","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1109/tns.2004.835620"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1109/tns.2004.835620","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2004-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://xplorestaging.ieee.org/ielx5/23/29603/01344292.pdf?arnumber=1344292","http://dx.doi.org/10.1109/tns.2004.835620"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1109/tns.2004.835620","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2004-10-19T08:20:44Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1592299453363,"id":"50|doi_________::00ad40afc28e0c860116ac595183a0c0","originalId":["S0377221797004529","10.1016/s0377-2217(97)00452-9","50|doiboost____::00ad40afc28e0c860116ac595183a0c0"],"pid":[{"value":"10.1016/s0377-2217(97)00452-9","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-06-16T09:24:13Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Chung-Yee Lee","name":"Chung-Yee","surname":"Lee","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[],"title":[{"value":"Two-machine flowshop scheduling with availability constraints","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2002-07-25T17:48:22Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"1999-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-04-24","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2002-07-25T17:48:22Z","dataInfo":null},"publisher":{"value":"Elsevier BV","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"https://www.elsevier.com/tdm/userlicense/1.0/","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://api.elsevier.com/content/article/PII:S0377221797004529?httpAccept=text/xml","https://api.elsevier.com/content/article/PII:S0377221797004529?httpAccept=text/plain","http://dx.doi.org/10.1016/s0377-2217(97)00452-9"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1016/s0377-2217(97)00452-9","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2002-07-25T17:48:22Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1016/s0377-2217(97)00452-9"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1016/s0377-2217(97)00452-9","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"1999-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"European Journal of Operational Research","issnPrinted":"0377-2217","issnOnline":null,"issnLinking":null,"ep":"429","iss":null,"sp":"420","vol":"114","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1609910911587,"id":"50|doi_________::00d8413c6d6f5c091ab6dda88f0a0ecb","originalId":["10.1175/jpo-d-16-0281.1","50|doiboost____::00d8413c6d6f5c091ab6dda88f0a0ecb"],"pid":[{"value":"10.1175/jpo-d-16-0281.1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-01-06T05:28:31Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"P. B. Smit","name":"P. B.","surname":"Smit","rank":1,"pid":null,"affiliation":null},{"fullname":"T. T. Janssen","name":"T. T.","surname":"Janssen","rank":2,"pid":null,"affiliation":null},{"fullname":"T. H. C. Herbers","name":"T. H. C.","surname":"Herbers","rank":3,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Oceanography","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Nonlinear Wave Kinematics near the Ocean Surface","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2017-05-09T19:33:59Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-11-16","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"AbstractEstimation of second-order, near-surface wave kinematics is important for interpretation of ocean surface remote sensing and surface-following instruments, determining loading on offshore structures, and understanding of upper-ocean transport processes. Unfortunately, conventional wave theories based on Stokes-type expansions do not consider fluid motions at levels above the unperturbed fluid level. The usual practice of extrapolating the fluid kinematics from the unperturbed free surface to higher points in the fluid is generally reasonable for narrowband waves, but for broadband ocean waves this results in dramatic (and nonphysical) overestimation of surface velocities. Consequently, practical approximations for random waves are at best empirical and are often only loosely constrained by physical principles. In the present work, the authors formulate the governing equations for water waves in an incompressible and inviscid fluid, using a boundary-fitted coordinate system (i.e., sigma or s coordinates) to derive expressions for near-surface kinematics in nonlinear random waves from first principles. Comparison to a numerical model valid for highly nonlinear waves shows that the new results 1) are consistent with second-order Stokes theory, 2) are similar to extrapolation methods in narrowband waves, and 3) greatly improve estimates of surface kinematics in random seas.","dataInfo":null}],"dateofacceptance":{"value":"2017-05-09T19:33:59Z","dataInfo":null},"publisher":{"value":"American Meteorological Society","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://journals.ametsoc.org/view/journals/phoc/47/7/jpo-d-16-0281.1.xml","http://journals.ametsoc.org/jpo/article-pdf/47/7/1657/4810788/jpo-d-16-0281_1.pdf","https://journals.ametsoc.org/downloadpdf/journals/phoc/47/7/jpo-d-16-0281.1.xml","http://dx.doi.org/10.1175/jpo-d-16-0281.1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1175/jpo-d-16-0281.1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-05-09T19:33:59Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1175/jpo-d-16-0281.1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1175/jpo-d-16-0281.1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Journal of Physical Oceanography","issnPrinted":"0022-3670","issnOnline":"1520-0485","issnLinking":null,"ep":"1673","iss":null,"sp":"1657","vol":"47","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1582142606255,"id":"50|doi_________::00df77889b1ee0af015524c627f0cf47","originalId":["10.1109/isscc.2010.5433999","50|doiboost____::00df77889b1ee0af015524c627f0cf47"],"pid":[{"value":"10.1109/isscc.2010.5433999","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-02-19T20:03:26Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Sameh A Ibrahim","name":"Sameh A","surname":"Ibrahim","rank":1,"pid":null,"affiliation":null},{"fullname":"Behzad Razavi","name":"Behzad","surname":"Razavi","rank":2,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[],"title":[{"value":"A 20Gb/s 40mW equalizer in 90nm CMOS technology","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2010-03-24T14:35:14Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2010-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-06-19","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2010-03-24T14:35:14Z","dataInfo":null},"publisher":{"value":"IEEE","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0004","classname":"0004","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://xplorestaging.ieee.org/ielx5/5428240/5433812/05433999.pdf?arnumber=5433999","http://dx.doi.org/10.1109/isscc.2010.5433999"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1109/isscc.2010.5433999","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2010-03-24T14:35:14Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1109/isscc.2010.5433999"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1109/isscc.2010.5433999","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2010-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"2010 IEEE International Solid-State Circuits Conference - (ISSCC)","issnPrinted":null,"issnOnline":null,"issnLinking":null,"ep":null,"iss":null,"sp":null,"vol":null,"edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} \ No newline at end of file From eccf3851b0352b557a4cce3dbd90d0fcf8ee3408 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:16:30 +0200 Subject: [PATCH 135/287] Hosted By Map - refactoring --- .../dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index ca5e82a4a..de41ebf28 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -23,10 +23,6 @@ object SparkProduceHostedByMap { implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - - - - def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { val openaire: HostedByInfo = input._1._1 val doaj: HostedByInfo = input._1._2 @@ -217,7 +213,7 @@ object SparkProduceHostedByMap { .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) - .rdd.saveAsTextFile(outputPath + "/hostedByMap", classOf[GzipCodec]) + .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) } From 67ba4c40e05bc90da931f7a621e11038849453db Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:17:28 +0200 Subject: [PATCH 136/287] Hosted By Map - added parameter resources --- .../hostedbymap/hostedby_apply_params.json | 36 ++++++++++++++++++ .../hostedbymap/hostedby_prepare_params.json | 37 +++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json new file mode 100644 index 000000000..e34398290 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json @@ -0,0 +1,36 @@ + +[ + + { + "paramName":"pip", + "paramLongName":"preparedInfoPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + + { + "paramName": "gp", + "paramLongName": "graphPath", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json new file mode 100644 index 000000000..9809d71b5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json @@ -0,0 +1,37 @@ + +[ + + { + "paramName":"hbmp", + "paramLongName":"hostedByMapPath", + "paramDescription": "the path to the datasource ", + "paramRequired": true + }, + + { + "paramName":"pip", + "paramLongName":"preparedInfoPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + + { + "paramName": "gp", + "paramLongName": "graphPath", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] From e94ae0b1de11f31dad6367605538371d9fe13d6b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:18:11 +0200 Subject: [PATCH 137/287] Hosted By Map - extention of the workflow to consider also the application of the map to publications and datasources --- .../graph/hostedbymap/oozie_app/workflow.xml | 106 ++++++++++++++++-- 1 file changed, 99 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index ecf6c3b31..30e500da3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -9,6 +9,10 @@ outputPath the output path + + hostedByMapPath + the output path + sparkDriverMemory memory for driver process @@ -65,23 +69,30 @@ - + + + + + ${wf:conf('resumeFrom') eq 'ProduceHBM'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + - - + + - @@ -134,13 +145,94 @@ --datasourcePath${sourcePath}/datasource --workingPath${workingDir} - --outputPath${outputPath} + --outputPath${hostedByMapPath} --masteryarn-cluster - + + + + yarn-cluster + Prepare info to apply the hbm + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --hostedByMapPath${hostedByMapPath} + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + + + + + + + + yarn-cluster + Apply hbm to result + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --outputPath${outputPath}/publication + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + + + yarn-cluster + Apply hbm to datasource + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToDatasource + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --outputPath${outputPath}/datasource + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + From eb8c3f85944c8b915d455270d9a3ec2e26047244 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:19:17 +0200 Subject: [PATCH 138/287] Hosted By Map - test modified because of the application of the new aggregator on datasources --- .../java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala index b78eb978b..c48c3b35d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -98,7 +98,7 @@ class TestApply extends java.io.Serializable{ val dats_ds :Dataset[Datasource] = spark.read.textFile(dats).map(p => mapper.readValue(p, classOf[Datasource])) - val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds :Dataset[EntityInfo] = Aggregators.datasourceToSingleId(spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo]))) assertEquals(10, dats_ds.count()) @@ -122,7 +122,7 @@ class TestApply extends java.io.Serializable{ }else{ assertTrue(pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid)) - assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassid)) + assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassname)) } }) From c7b71647c63cab956cb183775752df7c2b9d9b02 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:20:02 +0200 Subject: [PATCH 139/287] Hosted By Map - modification of the resource for testing the presence of only one entry per datasource id --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json index 6e1cce3b6..64e2433ed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json @@ -1,2 +1,3 @@ {"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} {"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} From 83c04e5d28472fe5a199f88b50b2b81e3b15c2e9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 4 Aug 2021 10:37:57 +0200 Subject: [PATCH 140/287] mapping test for dataset records adapted to reflect the delegated pid authority (zenodo) --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 60970f4d6..c431b4dd8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -24,6 +24,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -250,7 +251,24 @@ public class MappersTest { final Relation r1 = (Relation) list.get(1); final Relation r2 = (Relation) list.get(2); + assertEquals(d.getId(), r1.getSource()); + assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r1.getTarget()); + assertEquals(ModelConstants.RESULT_PROJECT, r1.getRelType()); + assertEquals(ModelConstants.OUTCOME, r1.getSubRelType()); + assertEquals(ModelConstants.IS_PRODUCED_BY, r1.getRelClass()); + assertTrue(r1.getValidated()); + assertEquals("2020-01-01", r1.getValidationDate()); + + assertEquals(d.getId(), r2.getTarget()); + assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r2.getSource()); + assertEquals(ModelConstants.RESULT_PROJECT, r2.getRelType()); + assertEquals(ModelConstants.OUTCOME, r2.getSubRelType()); + assertEquals(ModelConstants.PRODUCES, r2.getRelClass()); + assertTrue(r2.getValidated()); + assertEquals("2020-01-01", r2.getValidationDate()); + assertValidId(d.getId()); + assertEquals("50|doi_________::000374d100a9db469bd42b69dbb40b36", d.getId()); assertEquals(2, d.getOriginalId().size()); assertTrue(d.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:zenodo.org:3234526"))); assertValidId(d.getCollectedfrom().get(0).getKey()); @@ -304,10 +322,12 @@ public class MappersTest { }); assertEquals("0001", d.getInstance().get(0).getRefereed().getClassid()); assertNotNull(d.getInstance().get(0).getPid()); - assertTrue(d.getInstance().get(0).getPid().isEmpty()); + assertFalse(d.getInstance().get(0).getPid().isEmpty()); - assertEquals("doi", d.getInstance().get(0).getAlternateIdentifier().get(0).getQualifier().getClassid()); - assertEquals("10.5281/zenodo.3234526", d.getInstance().get(0).getAlternateIdentifier().get(0).getValue()); + assertEquals("doi", d.getInstance().get(0).getPid().get(0).getQualifier().getClassid()); + assertEquals("10.5281/zenodo.3234526", d.getInstance().get(0).getPid().get(0).getValue()); + + assertTrue(d.getInstance().get(0).getAlternateIdentifier().isEmpty()); assertValidId(r1.getSource()); assertValidId(r1.getTarget()); @@ -738,12 +758,11 @@ public class MappersTest { } private void assertValidId(final String id) { - System.out.println(id); + // System.out.println(id); assertEquals(49, id.length()); - assertEquals('|', id.charAt(2)); - assertEquals(':', id.charAt(15)); - assertEquals(':', id.charAt(16)); + assertEquals(IdentifierFactory.ID_PREFIX_SEPARATOR, id.substring(2, 3)); + assertEquals(IdentifierFactory.ID_SEPARATOR, id.substring(15, 17)); } private List vocs() throws IOException { From 1965e4eece11830cbf373b5345030c1198f72fb8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 18:29:03 +0200 Subject: [PATCH 141/287] new workflow for downloading the dump of crossref and unpack it --- .../oozie_app/config-default.xml | 42 ++++++ .../downloadandunpack/oozie_app/download.sh | 2 + .../downloadandunpack/oozie_app/mock.sh | 2 + .../downloadandunpack/oozie_app/workflow.xml | 121 ++++++++++++++++++ .../doiboost/preprocess/oozie_app/download.sh | 2 +- 5 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml new file mode 100644 index 000000000..508202e30 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml @@ -0,0 +1,42 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh new file mode 100644 index 000000000..1bb7aff1f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh @@ -0,0 +1,2 @@ +#!/bin/bash +curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh new file mode 100644 index 000000000..30386d613 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh @@ -0,0 +1,2 @@ +#!/bin/bash +curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml new file mode 100644 index 000000000..91de3bfb3 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml @@ -0,0 +1,121 @@ + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + + crossrefdumpfilename + the Crossref input path + + + crossrefDumpPath + the Crossref dump path + + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords + --hdfsServerUri${nameNode} + --crossrefFileNameTarGz${crossrefdumpfilename} + --workingPath${crossrefDumpPath} + --outputPath${crossrefDumpPath}/files/ + + + + + + + + yarn-cluster + cluster + SparkUnpackCrossrefEntries + eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn-cluster + --sourcePath${crossrefDumpPath}/files + --targetPath${crossrefDumpPath}/crossref_unpack/ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh index dfb0db708..30386d613 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh @@ -1,2 +1,2 @@ -#!bin/bash +#!/bin/bash curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file From 5faeefbda8160685a16d7628d68fa534f03146c7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 5 Aug 2021 10:54:03 +0200 Subject: [PATCH 142/287] added script to download the dump,changed the workflow input paramenters --- .../crossref/ExtractCrossrefRecords.java | 8 +- .../oozie_app/download.sh | 2 + .../oozie_app/workflow.xml | 119 +++++++++--------- 3 files changed, 68 insertions(+), 61 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java index c7cae1fcb..c7d3770a0 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java @@ -29,16 +29,16 @@ public class ExtractCrossrefRecords { "/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json"))); parser.parseArgument(args); final String hdfsServerUri = parser.get("hdfsServerUri"); - final String workingPath = parser.get("workingPath"); + final String workingPath = hdfsServerUri.concat(parser.get("workingPath")); final String outputPath = parser.get("outputPath"); final String crossrefFileNameTarGz = parser.get("crossrefFileNameTarGz"); - Path hdfsreadpath = new Path(hdfsServerUri.concat(crossrefFileNameTarGz)); + Path hdfsreadpath = new Path(workingPath.concat("/").concat(crossrefFileNameTarGz)); Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath)); + conf.set("fs.defaultFS", workingPath); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); - FileSystem fs = FileSystem.get(URI.create(hdfsServerUri.concat(workingPath)), conf); + FileSystem fs = FileSystem.get(URI.create(workingPath), conf); FSDataInputStream crossrefFileStream = fs.open(hdfsreadpath); try (TarArchiveInputStream tais = new TarArchiveInputStream( new GzipCompressorInputStream(crossrefFileStream))) { diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh new file mode 100644 index 000000000..662f5313c --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh @@ -0,0 +1,2 @@ +#!/bin/bash +curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index 506d86a08..2d3ad6bc3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -1,13 +1,5 @@ - + - - crossrefDumpPath - the working dir base path - - - inputPathCrossref - the working dir base path - sparkDriverMemory memory for driver process @@ -18,27 +10,82 @@ sparkExecutorCores - 2 number of cores used by single executor + + + crossrefdumpfilename + the Crossref input path + + + crossrefDumpPath + the Crossref dump path + + + - + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + ${jobTracker} ${nameNode} eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz + --crossrefFileNameTarGz${crossrefdumpfilename} --workingPath${crossrefDumpPath} - --outputPath${workingDir}/files/ + --outputPath${crossrefDumpPath}/files/ @@ -48,7 +95,7 @@ yarn-cluster cluster - SparkGenerateCrossrefDataset + SparkUnpackCrossrefEntries eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries dhp-doiboost-${projectVersion}.jar @@ -63,56 +110,14 @@ --masteryarn-cluster --sourcePath${crossrefDumpPath}/files - --targetPath${inputPathCrossref}/crossref_ds - - - - - - - - - yarn-cluster - cluster - SparkGenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${inputPathCrossref}/crossref_ds - --targetPath${inputPathCrossref}/crossref_ds_updates + --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - - - - - - - - - - - - \ No newline at end of file From bd096f51706b60f740430c007a9d3c67ec66d519 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 5 Aug 2021 10:55:43 +0200 Subject: [PATCH 143/287] removed not needed param file --- .../generate_dataset_params.json | 21 ------------------- 1 file changed, 21 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json deleted file mode 100644 index 63e080337..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json +++ /dev/null @@ -1,21 +0,0 @@ -[ - { - "paramName": "s", - "paramLongName": "sourcePath", - "paramDescription": "the source mdstore path", - "paramRequired": true - }, - - { - "paramName": "t", - "paramLongName": "targetPath", - "paramDescription": "the target mdstore path", - "paramRequired": true - }, - { - "paramName": "m", - "paramLongName": "master", - "paramDescription": "the master name", - "paramRequired": true - } -] \ No newline at end of file From 73dc082927a1bd2561bac10073c075a4575df690 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 5 Aug 2021 15:20:50 +0200 Subject: [PATCH 144/287] added new dumped field (openaccessroute, pid and alternate identifier at the level of the instance) and the bipFinder measure at the level of the result --- .../dhp/oa/graph/dump/ResultMapper.java | 52 ++++++++++++++++--- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index fe9b4e443..9f665c06d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -5,6 +5,16 @@ import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.dump.oaf.AccessRight; +import eu.dnetlib.dhp.schema.dump.oaf.Author; +import eu.dnetlib.dhp.schema.dump.oaf.Country; +import eu.dnetlib.dhp.schema.dump.oaf.GeoLocation; +import eu.dnetlib.dhp.schema.dump.oaf.Instance; +import eu.dnetlib.dhp.schema.dump.oaf.KeyValue; +import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; +import eu.dnetlib.dhp.schema.dump.oaf.Qualifier; +import eu.dnetlib.dhp.schema.dump.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.*; import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -13,10 +23,6 @@ import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Context; import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class ResultMapper implements Serializable { @@ -130,6 +136,13 @@ public class ResultMapper implements Serializable { break; } + Optional> mes = Optional.ofNullable(input.getMeasures()); + if(mes.isPresent()){ + List measure = new ArrayList<>(); + mes.get().forEach(m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(u.getKey(), u.getValue())))); + out.setMeasures(measure); + } + Optional .ofNullable(input.getAuthor()) .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); @@ -395,11 +408,12 @@ public class ResultMapper implements Serializable { } private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) { - Optional opAr = Optional - .ofNullable(i.getAccessright()); + Optional opAr = Optional.ofNullable(i.getAccessright()); + if (opAr.isPresent()) { if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) { String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid()); + instance .setAccessright( AccessRight @@ -407,9 +421,35 @@ public class ResultMapper implements Serializable { code, Constants.coarCodeLabelMap.get(code), Constants.COAR_ACCESS_RIGHT_SCHEMA)); + if (opAr.get().getOpenAccessRoute() != null){ + switch (opAr.get().getOpenAccessRoute()){ + case hybrid: + instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.hybrid); + break; + case gold: + instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.gold); + break; + case green: + instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.green); + break; + case bronze: + instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.bronze); + break; + + } + } } } + Optional.ofNullable(i.getPid()) + .ifPresent(pid -> instance.setPid( + pid.stream().map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .collect(Collectors.toList()))); + + Optional.ofNullable(i.getAlternateIdentifier()) + .ifPresent(ai -> instance.setAlternateIdentifier(ai.stream().map(p -> ControlledField. + newInstance(p.getQualifier().getClassid(), p.getValue())).collect(Collectors.toList()))); + Optional .ofNullable(i.getLicense()) .ifPresent(value -> instance.setLicense(value.getValue())); From c3931557e3985c70a99159b414d138576aaf278b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 6 Aug 2021 18:56:18 +0200 Subject: [PATCH 145/287] extended the logic of the dump to consider the validation date in the relation (also in the dumped result for communities and funders at the level of the project), the extention on the instance for the APC, the pid, the alternate identifiers, and the extention of the AccessRight to store the OpenAccessRoute. Added new resourec for testing and extended the old class to verify the new dump. Fixed also issue on relation dump: only relation whose source and target are entities in the graph are dumped. The same hold for references to projects --- .../dhp/oa/graph/dump/DumpProducts.java | 4 +- .../oa/graph/dump/QueryInformationSystem.java | 18 +- .../dhp/oa/graph/dump/ResultMapper.java | 133 ++++++---- .../dhp/oa/graph/dump/SaveCommunityMap.java | 23 +- .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 4 +- .../graph/dump/community/CommunitySplit.java | 2 +- .../community/SparkPrepareResultProject.java | 14 +- .../dump/complete/DumpGraphEntities.java | 26 +- .../dhp/oa/graph/dump/complete/Extractor.java | 2 +- .../dump/complete/QueryInformationSystem.java | 7 +- .../dump/complete/SparkDumpRelationJob.java | 11 +- .../funderresults/SparkDumpFunderResults.java | 65 +++-- .../SparkResultLinkedToProject.java | 43 ++-- .../graph/dump/funder_result_parameters.json | 4 +- .../graph/dump/input_parameters_link_prj.json | 4 +- .../dhp/oa/graph/dump/DumpJobTest.java | 182 +++++++++++++- .../dump/PrepareResultProjectJobTest.java | 109 ++++++++ .../dump/QueryInformationSystemTest.java | 2 +- .../oa/graph/dump/SplitForCommunityTest.java | 8 +- .../dump/complete/CreateRelationTest.java | 238 +++++++++--------- .../graph/dump/complete/DumpRelationTest.java | 76 +++++- .../complete/QueryInformationSystemTest.java | 5 +- .../ResultLinkedToProjectTest.java | 10 +- .../dump/funderresult/SplitPerFunderTest.java | 8 +- .../dump/funderresource/extendeddump/project | 12 + .../funderresource/extendeddump/publication | 3 +- .../dump/funderresource/extendeddump/relation | 9 - .../graph/dump/funderresource/match/project | 12 + .../match/{relations.json => relation} | 4 +- .../graph/dump/funderresource/nomatch/project | 12 + .../nomatch/{relations.json => relation} | 0 .../oa/graph/dump/relation/relation_validated | 30 +++ .../resultDump/publication_extendedinstance | 1 + .../resultProject/match_validatedRels/project | 23 ++ .../match_validatedRels/relation | 8 + pom.xml | 2 +- 36 files changed, 820 insertions(+), 294 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/project delete mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/project rename dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/{relations.json => relation} (95%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/project rename dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/{relations.json => relation} (100%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/relation/relation_validated create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_validatedRels/project create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_validatedRels/relation diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java index 4ddcea9e8..d09e584a3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java @@ -37,8 +37,8 @@ public class DumpProducts implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - execDump( - spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType); + execDump( + spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType); }); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index d1f295fcf..7b661ac66 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -29,15 +29,15 @@ public class QueryInformationSystem { ""; private static final String XQUERY_CI = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and $x//CONFIGURATION/context[./@id=%s] " - + - " return " + - " " + - "{$x//CONFIGURATION/context/@id}" + - "{$x//CONFIGURATION/context/@label}" + - ""; + + + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + + " and $x//CONFIGURATION/context[./@id=%s] " + + + " return " + + " " + + "{$x//CONFIGURATION/context/@id}" + + "{$x//CONFIGURATION/context/@label}" + + ""; public CommunityMap getCommunityMap(boolean singleCommunity, String community_id) throws ISLookUpException, DocumentException { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 9f665c06d..e38117338 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -5,6 +5,10 @@ import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.dump.oaf.*; import eu.dnetlib.dhp.schema.dump.oaf.AccessRight; import eu.dnetlib.dhp.schema.dump.oaf.Author; import eu.dnetlib.dhp.schema.dump.oaf.Country; @@ -14,15 +18,13 @@ import eu.dnetlib.dhp.schema.dump.oaf.KeyValue; import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; import eu.dnetlib.dhp.schema.dump.oaf.Qualifier; import eu.dnetlib.dhp.schema.dump.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.*; -import org.apache.commons.lang3.StringUtils; - -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.dump.oaf.*; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Context; import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult; +import eu.dnetlib.dhp.schema.oaf.*; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; public class ResultMapper implements Serializable { @@ -137,9 +139,12 @@ public class ResultMapper implements Serializable { } Optional> mes = Optional.ofNullable(input.getMeasures()); - if(mes.isPresent()){ + if (mes.isPresent()) { List measure = new ArrayList<>(); - mes.get().forEach(m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(u.getKey(), u.getValue())))); + mes + .get() + .forEach( + m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); out.setMeasures(measure); } @@ -271,18 +276,17 @@ public class ResultMapper implements Serializable { } - List pids = new ArrayList<>(); + Optional .ofNullable(input.getPid()) .ifPresent( - value -> value + value -> out.setPid(value .stream() - .forEach( - p -> pids - .add( + .map( + p -> ControlledField - .newInstance(p.getQualifier().getClassid(), p.getValue())))); - out.setPid(pids); + .newInstance(p.getQualifier().getClassid(), p.getValue())).collect(Collectors.toList()))); + oStr = Optional.ofNullable(input.getDateofacceptance()); if (oStr.isPresent()) { out.setPublicationdate(oStr.get().getValue()); @@ -292,10 +296,11 @@ public class ResultMapper implements Serializable { out.setPublisher(oStr.get().getValue()); } - List sourceList = new ArrayList<>(); + Optional .ofNullable(input.getSource()) - .ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue()))); + .ifPresent(value -> out.setSource(value.stream().map(s -> s.getValue()).collect(Collectors.toList()) )); + // value.stream().forEach(s -> sourceList.add(s.getValue()))); // out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList())); List subjectList = new ArrayList<>(); Optional @@ -421,8 +426,8 @@ public class ResultMapper implements Serializable { code, Constants.coarCodeLabelMap.get(code), Constants.COAR_ACCESS_RIGHT_SCHEMA)); - if (opAr.get().getOpenAccessRoute() != null){ - switch (opAr.get().getOpenAccessRoute()){ + if (opAr.get().getOpenAccessRoute() != null) { + switch (opAr.get().getOpenAccessRoute()) { case hybrid: instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.hybrid); break; @@ -441,14 +446,25 @@ public class ResultMapper implements Serializable { } } - Optional.ofNullable(i.getPid()) - .ifPresent(pid -> instance.setPid( - pid.stream().map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) - .collect(Collectors.toList()))); + Optional + .ofNullable(i.getPid()) + .ifPresent( + pid -> instance + .setPid( + pid + .stream() + .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .collect(Collectors.toList()))); - Optional.ofNullable(i.getAlternateIdentifier()) - .ifPresent(ai -> instance.setAlternateIdentifier(ai.stream().map(p -> ControlledField. - newInstance(p.getQualifier().getClassid(), p.getValue())).collect(Collectors.toList()))); + Optional + .ofNullable(i.getAlternateIdentifier()) + .ifPresent( + ai -> instance + .setAlternateIdentifier( + ai + .stream() + .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())) + .collect(Collectors.toList()))); Optional .ofNullable(i.getLicense()) @@ -554,34 +570,55 @@ public class ResultMapper implements Serializable { return a; } - private static Pid getOrcid(List p) { - for (StructuredProperty pid : p) { - if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) { - Optional di = Optional.ofNullable(pid.getDataInfo()); - if (di.isPresent()) { - return Pid - .newInstance( + private static Pid getAuthorPid(StructuredProperty pid) { + Optional di = Optional.ofNullable(pid.getDataInfo()); + if (di.isPresent()) { + return Pid + .newInstance( ControlledField - .newInstance( - pid.getQualifier().getClassid(), - pid.getValue()), + .newInstance( + pid.getQualifier().getClassid(), + pid.getValue()), Provenance - .newInstance( - di.get().getProvenanceaction().getClassname(), - di.get().getTrust())); - } else { - return Pid - .newInstance( + .newInstance( + di.get().getProvenanceaction().getClassname(), + di.get().getTrust())); + } else { + return Pid + .newInstance( ControlledField - .newInstance( - pid.getQualifier().getClassid(), - pid.getValue()) + .newInstance( + pid.getQualifier().getClassid(), + pid.getValue()) - ); - } - - } + ); } + } + + private static Pid getOrcid(List p) { + List pid_list = p.stream().map(pid -> { + if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) || + (pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))){ + return pid; + } + return null; + }).filter(pid -> pid != null).collect(Collectors.toList()); + + if(pid_list.size() == 1){ + return getAuthorPid(pid_list.get(0)); + } + + List orcid = pid_list.stream().filter(ap -> ap.getQualifier().getClassid() + .equals(ModelConstants.ORCID)).collect(Collectors.toList()); + if(orcid.size() == 1){ + return getAuthorPid(orcid.get(0)); + } + orcid = pid_list.stream().filter(ap -> ap.getQualifier().getClassid() + .equals(ModelConstants.ORCID_PENDING)).collect(Collectors.toList()); + if(orcid.size() == 1){ + return getAuthorPid(orcid.get(0)); + } + return null; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index 7db19f771..71bea6047 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.oa.graph.dump; import java.io.BufferedWriter; @@ -54,10 +55,10 @@ public class SaveCommunityMap implements Serializable { public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils - .toString( - SaveCommunityMap.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json")); + .toString( + SaveCommunityMap.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -71,8 +72,10 @@ public class SaveCommunityMap implements Serializable { final String isLookUpUrl = parser.get("isLookUpUrl"); log.info("isLookUpUrl: {}", isLookUpUrl); - final Boolean singleCommunity = Optional.ofNullable(parser.get("singleDeposition")) - .map(Boolean::valueOf).orElse(false); + final Boolean singleCommunity = Optional + .ofNullable(parser.get("singleDeposition")) + .map(Boolean::valueOf) + .orElse(false); final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null); @@ -82,8 +85,12 @@ public class SaveCommunityMap implements Serializable { } - private void saveCommunityMap(boolean singleCommunity, String community_id) throws ISLookUpException, IOException, DocumentException { - writer.write(Utils.OBJECT_MAPPER.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, community_id))); + private void saveCommunityMap(boolean singleCommunity, String community_id) + throws ISLookUpException, IOException, DocumentException { + writer + .write( + Utils.OBJECT_MAPPER + .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, community_id))); writer.close(); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index 28881b253..e0b7cf5d7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -48,14 +48,14 @@ public class SendToZenodoHDFS implements Serializable { .orElse(false); final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null); - //final String communityMapPath = parser.get("communityMapPath"); + // final String communityMapPath = parser.get("communityMapPath"); Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); - //CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath); + // CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath); RemoteIterator fileStatusListIterator = fileSystem .listFiles( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index d64c26503..6abca2255 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -49,7 +49,7 @@ public class CommunitySplit implements Serializable { .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); communities - .keySet() + .keySet() .stream() .forEach(c -> printResult(c, result, outputPath + "/" + communities.get(c).replace(" ", "_"))); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java index 2d43888b4..6b4baafde 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java @@ -8,6 +8,8 @@ import java.io.StringReader; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.dump.oaf.community.Validated; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -76,7 +78,7 @@ public class SparkPrepareResultProject implements Serializable { private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath) { Dataset relation = Utils .readPath(spark, inputPath + "/relation", Relation.class) - .filter("dataInfo.deletedbyinference = false and lower(relClass) = 'isproducedby'"); + .filter("dataInfo.deletedbyinference = false and lower(relClass) = '" + ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'"); Dataset projects = Utils .readPath(spark, inputPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class); @@ -96,7 +98,7 @@ public class SparkPrepareResultProject implements Serializable { rp.setResultId(s); eu.dnetlib.dhp.schema.oaf.Project p = first._1(); projectSet.add(p.getId()); - Project ps = getProject(p); + Project ps = getProject(p, first._2); List projList = new ArrayList<>(); projList.add(ps); @@ -105,7 +107,7 @@ public class SparkPrepareResultProject implements Serializable { eu.dnetlib.dhp.schema.oaf.Project op = c._1(); if (!projectSet.contains(op.getId())) { projList - .add(getProject(op)); + .add(getProject(op, c._2)); projectSet.add(op.getId()); @@ -120,7 +122,7 @@ public class SparkPrepareResultProject implements Serializable { .json(outputPath); } - private static Project getProject(eu.dnetlib.dhp.schema.oaf.Project op) { + private static Project getProject(eu.dnetlib.dhp.schema.oaf.Project op, Relation relation) { Project p = Project .newInstance( op.getId(), @@ -155,7 +157,9 @@ public class SparkPrepareResultProject implements Serializable { provenance.setTrust(di.get().getTrust()); p.setProvenance(provenance); } - + if (relation.getValidated()){ + p.setValidated(Validated.newInstance(relation.getValidated(), relation.getValidationDate())); + } return p; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java index b843ec365..7441baaef 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; +import static com.jayway.jsonpath.Filter.filter; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; @@ -8,8 +9,12 @@ import java.io.StringReader; import java.util.*; import java.util.stream.Collectors; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; @@ -448,19 +453,18 @@ public class DumpGraphEntities implements Serializable { private static void organizationMap(SparkSession spark, String inputPath, String outputPath, Class inputClazz) { - Utils - .readPath(spark, inputPath, inputClazz) - .map( - (MapFunction) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o), - Encoders.bean(Organization.class)) - .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); + Utils.readPath(spark, inputPath, inputClazz) + .map( + (MapFunction) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o), + Encoders.bean(Organization.class)) + .filter((FilterFunction) o -> o!= null) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); } - private static Organization mapOrganization(eu.dnetlib.dhp.schema.oaf.Organization org) { + private static eu.dnetlib.dhp.schema.dump.oaf.graph.Organization mapOrganization(eu.dnetlib.dhp.schema.oaf.Organization org) { if (org.getDataInfo().getDeletedbyinference()) return null; Organization organization = new Organization(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java index f76f92013..7e8ddfaf8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java @@ -129,7 +129,7 @@ public class Extractor implements Serializable { return relationList.iterator(); }, Encoders.bean(Relation.class)) - .write() + .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) .json(outputPath); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java index 3736b7380..ecb588198 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java @@ -5,6 +5,8 @@ import java.io.StringReader; import java.util.*; import java.util.function.Consumer; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; @@ -12,9 +14,9 @@ import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.jetbrains.annotations.NotNull; +import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.dhp.utils.DHPUtils; public class QueryInformationSystem { @@ -136,6 +138,9 @@ public class QueryInformationSystem { } private String makeOpenaireId(Node el, String prefix) { + if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))){ + return null; + } String funder = null; String grantId = null; String funding = null; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java index 11c3600dd..7370b43f8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java @@ -107,13 +107,10 @@ public class SparkDumpRelationJob implements Serializable { } } } -// Optional -// .ofNullable(relation.getDataInfo()) -// .ifPresent( -// datainfo -> rel_new -// .setProvenance( -// Provenance -// .newInstance(datainfo.getProvenanceaction().getClassname(), datainfo.getTrust()))); + if(relation.getValidated()){ + rel_new.setValidated(relation.getValidated()); + rel_new.setValidationDate(relation.getValidationDate()); + } return rel_new; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 9fcdcaf78..1c7052553 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -44,9 +44,9 @@ public class SparkDumpFunderResults implements Serializable { parser.parseArgument(args); Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String inputPath = parser.get("sourcePath"); @@ -55,40 +55,38 @@ public class SparkDumpFunderResults implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final String relationPath = parser.get("relationPath"); - log.info("relationPath: {}", relationPath); + final String graphPath = parser.get("graphPath"); + log.info("relationPath: {}", graphPath); SparkConf conf = new SparkConf(); runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - writeResultProjectList(spark, inputPath, outputPath, relationPath); - }); + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + writeResultProjectList(spark, inputPath, outputPath, graphPath); + }); } private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath, - String relationPath) { + String graphPath) { - Dataset relation = Utils - .readPath(spark, relationPath + "/relation", Relation.class) - .filter( - "dataInfo.deletedbyinference = false and lower(relClass) = '" - + ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'"); + Dataset project = Utils + .readPath(spark, graphPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class); Dataset result = Utils - .readPath(spark, inputPath + "/publication", CommunityResult.class) - .union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class)) - .union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class)) - .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); + .readPath(spark, inputPath + "/publication", CommunityResult.class) + .union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class)) + .union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class)) + .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); - List funderList = relation - .select("target") - .map((MapFunction) value -> value.getString(0).substring(0, 15), Encoders.STRING()) - .distinct() - .collectAsList(); + + List funderList = project + .select("id") + .map((MapFunction) value -> value.getString(0).substring(0, 15), Encoders.STRING()) + .distinct() + .collectAsList(); funderList.forEach(funder -> { String fundernsp = funder.substring(3); @@ -109,7 +107,7 @@ public class SparkDumpFunderResults implements Serializable { } private static void dumpResults(String nsp, Dataset results, String outputPath, - String funderName) { + String funderName) { results.map((MapFunction) r -> { if (!Optional.ofNullable(r.getProjects()).isPresent()) { @@ -117,7 +115,6 @@ public class SparkDumpFunderResults implements Serializable { } for (Project p : r.getProjects()) { if (p.getId().startsWith(nsp)) { - if (nsp.startsWith("40|irb")) { if (p.getFunder().getShortName().equals(funderName)) return r; @@ -129,15 +126,15 @@ public class SparkDumpFunderResults implements Serializable { } return null; }, Encoders.bean(CommunityResult.class)) - .filter((FilterFunction) r -> r != null) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "/" + funderName); + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/" + funderName); } private static void writeFunderResult(String funder, Dataset results, String outputPath, - String funderDump) { + String funderDump) { if (funder.startsWith("40|irb")) { dumpResults(funder, results, outputPath, "HRZZ"); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index e0355d6d6..55578fdab 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -10,6 +10,7 @@ import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -21,14 +22,14 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; -import org.apache.spark.sql.*; -import eu.dnetlib.dhp.schema.oaf.Project; + /** * Selects the results linked to projects. Only for these results the dump will be performed. - * The code to perform the dump and to expend the dumped results with the informaiton related to projects + * The code to perform the dump and to expend the dumped results with the information related to projects * is the one used for the dump of the community products */ public class SparkResultLinkedToProject implements Serializable { @@ -78,6 +79,7 @@ public class SparkResultLinkedToProject implements Serializable { private static void writeResultsLinkedToProjects(SparkSession spark, Class inputClazz, String inputPath, String outputPath, String graphPath) { + Dataset results = Utils .readPath(spark, inputPath, inputClazz) .filter("dataInfo.deletedbyinference = false and datainfo.invisible = false"); @@ -86,25 +88,32 @@ public class SparkResultLinkedToProject implements Serializable { .filter( "dataInfo.deletedbyinference = false and lower(relClass) = '" + ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'"); + Dataset project = Utils.readPath(spark, graphPath + "/project", Project.class); - spark - .sql( - "Select res.* " + - "from relation rel " + - "join result res " + - "on rel.source = res.id " + - "join project p " + - "on rel.target = p.id " + - "") - .as(Encoders.bean(inputClazz)) - .groupByKey( - (MapFunction< R, String>) value -> value - .getId(), + results.createOrReplaceTempView("result"); + relations.createOrReplaceTempView("relation"); + project.createOrReplaceTempView("project"); + + Dataset tmp = spark + .sql( + "Select res.* " + + "from relation rel " + + "join result res " + + "on rel.source = res.id " + + "join project p " + + "on rel.target = p.id " + + "") + .as(Encoders.bean(inputClazz)); + tmp + .groupByKey( + (MapFunction) value -> value + .getId(), Encoders.STRING()) - .mapGroups((MapGroupsFunction) (k, it) -> it.next(), Encoders.bean(inputClazz)) + .mapGroups((MapGroupsFunction) (k, it) -> it.next(), Encoders.bean(inputClazz)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json index 9a5a553b1..5dc8e8910 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json @@ -18,8 +18,8 @@ "paramRequired": false }, { - "paramName": "rp", - "paramLongName": "relationPath", + "paramName": "gp", + "paramLongName": "graphPath", "paramDescription": "the relationPath", "paramRequired": true } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json index fdd9492fe..f5641bc45 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json @@ -24,8 +24,8 @@ "paramRequired": true }, { - "paramName":"rp", - "paramLongName":"relationPath", + "paramName":"gp", + "paramLongName":"graphPath", "paramDescription": "the path to the relations", "paramRequired": true } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 6b64a4124..39454192e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -7,7 +7,13 @@ import java.nio.file.Path; import java.util.Arrays; import java.util.List; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.sun.xml.internal.ws.policy.AssertionSet; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.dump.oaf.Instance; +import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; import org.apache.commons.io.FileUtils; +import org.apache.neethi.Assertion; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -199,7 +205,7 @@ public class DumpJobTest { Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90); -//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) + } @@ -302,6 +308,178 @@ public class DumpJobTest { //TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) + } + + @Test + public void testPublicationExtendedInstance(){ + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance") + .getPath(); + + final String communityMapPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); + + DumpProducts dump = new DumpProducts(); + dump + .run( + // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); + + Assertions.assertEquals(1, verificationDataset.count()); + verificationDataset.show(false); + + GraphResult gr = verificationDataset.first(); + + + Assertions.assertEquals(2, gr.getMeasures().size()); + Assertions.assertTrue(gr.getMeasures().stream().anyMatch(m -> m.getKey().equals("influence") + && m.getValue().equals("1.62759106106e-08"))); + Assertions.assertTrue(gr.getMeasures().stream().anyMatch(m -> m.getKey().equals("popularity") + && m.getValue().equals("0.22519296"))); + + Assertions.assertEquals(6, gr.getAuthor().size()); + Assertions.assertTrue(gr.getAuthor().stream().anyMatch(a -> a.getFullname().equals("Nikolaidou,Charitini") && + a.getName().equals("Charitini") && a.getSurname().equals("Nikolaidou") + && a.getRank() == 1 && a.getPid() == null)); + + Assertions.assertTrue(gr.getAuthor().stream().anyMatch(a -> a.getFullname().equals("Votsi,Nefta") && + a.getName().equals("Nefta") && a.getSurname().equals("Votsi") + && a.getRank() == 2 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID) + && a.getPid().getId().getValue().equals("0000-0001-6651-1178") && a.getPid().getProvenance() != null)); + + Assertions.assertTrue(gr.getAuthor().stream().anyMatch(a -> a.getFullname().equals("Sgardelis,Steanos") && + a.getName().equals("Steanos") && a.getSurname().equals("Sgardelis") + && a.getRank() == 3 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID_PENDING) + && a.getPid().getId().getValue().equals("0000-0001-6651-1178") && a.getPid().getProvenance() != null)); + + Assertions.assertTrue(gr.getAuthor().stream().anyMatch(a -> a.getFullname().equals("Halley,John") && + a.getName().equals("John") && a.getSurname().equals("Halley") + && a.getRank() == 4 && a.getPid() == null)); + + Assertions.assertTrue(gr.getAuthor().stream().anyMatch(a -> a.getFullname().equals("Pantis,John") && + a.getName().equals("John") && a.getSurname().equals("Pantis") + && a.getRank() == 5 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID) + && a.getPid().getId().getValue().equals("0000-0001-6651-1178") && a.getPid().getProvenance() != null)); + + Assertions.assertTrue(gr.getAuthor().stream().anyMatch(a -> a.getFullname().equals("Tsiafouli,Maria") && + a.getName().equals("Maria") && a.getSurname().equals("Tsiafouli") + && a.getRank() == 6 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID_PENDING) + && a.getPid().getId().getValue().equals("0000-0001-6651-1178") && a.getPid().getProvenance() != null)); + + Assertions.assertEquals("publication", gr.getType()); + + Assertions.assertEquals("eng", gr.getLanguage().getCode()); + Assertions.assertEquals("English", gr.getLanguage().getLabel()); + + Assertions.assertEquals(1, gr.getCountry().size()); + Assertions.assertEquals("IT" , gr.getCountry().get(0).getCode()); + Assertions.assertEquals("Italy" , gr.getCountry().get(0).getLabel()); + Assertions.assertTrue( gr.getCountry().get(0).getProvenance() == null); + + Assertions.assertEquals(12, gr.getSubjects().size()); + Assertions.assertTrue(gr.getSubjects().stream().anyMatch(s -> s.getSubject().getValue().equals("Ecosystem Services hotspots") + && s.getSubject().getScheme().equals("ACM") && s.getProvenance() != null && + s.getProvenance().getProvenance().equals("sysimport:crosswalk:repository"))); + Assertions.assertTrue(gr.getSubjects().stream().anyMatch(s -> s.getSubject().getValue().equals("Natura 2000") + && s.getSubject().getScheme().equals("") && s.getProvenance() != null && + s.getProvenance().getProvenance().equals("sysimport:crosswalk:repository"))); + + Assertions.assertEquals("Ecosystem Service capacity is higher in areas of multiple designation types", + gr.getMaintitle()); + + Assertions.assertEquals(null, gr.getSubtitle()); + + Assertions.assertEquals(1, gr.getDescription().size()); + + Assertions.assertTrue(gr.getDescription().get(0).startsWith("The implementation of the Ecosystem Service (ES) concept into practice")); + Assertions.assertTrue(gr.getDescription().get(0).endsWith("start complying with new standards and demands for nature conservation and environmental management.")); + + Assertions.assertEquals("2017-01-01", gr.getPublicationdate()); + + Assertions.assertEquals("Pensoft Publishers", gr.getPublisher()); + + Assertions.assertEquals(null, gr.getEmbargoenddate()); + + Assertions.assertEquals(1, gr.getSource().size()); + Assertions.assertEquals("One Ecosystem 2: e13718", gr.getSource().get(0)); + + Assertions.assertEquals(1, gr.getFormat().size()); + Assertions.assertEquals("text/html", gr.getFormat().get(0)); + + Assertions.assertEquals(0, gr.getContributor().size()); + + Assertions.assertEquals(0, gr.getCoverage().size()); + + Assertions.assertEquals(ModelConstants.ACCESS_RIGHT_OPEN, gr.getBestaccessright().getLabel()); + Assertions.assertEquals(Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode()); + Assertions.assertEquals(null, gr.getBestaccessright().getOpenAccessRoute()); + + Assertions.assertEquals("One Ecosystem", gr.getContainer().getName()); + Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline()); + Assertions.assertEquals("", gr.getContainer().getIssnPrinted()); + Assertions.assertEquals("", gr.getContainer().getIssnLinking()); + + Assertions.assertTrue(null == gr.getDocumentationUrl() || gr.getDocumentationUrl().size() == 0); + + Assertions.assertTrue(null == gr.getCodeRepositoryUrl()); + + Assertions.assertEquals(null, gr.getProgrammingLanguage()); + + Assertions.assertTrue(null == gr.getContactperson() || gr.getContactperson().size() == 0); + + Assertions.assertTrue(null == gr.getContactgroup() || gr.getContactgroup().size() == 0); + + Assertions.assertTrue(null == gr.getTool() || gr.getTool().size() == 0); + + Assertions.assertEquals(null, gr.getSize()); + + Assertions.assertEquals(null, gr.getVersion()); + + Assertions.assertTrue(null == gr.getGeolocation() || gr.getGeolocation().size() == 0); + + Assertions.assertEquals("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2", gr.getId()); + + Assertions.assertEquals(2, gr.getOriginalId().size()); + Assertions.assertTrue(gr.getOriginalId().contains("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2") + && gr.getOriginalId().contains("10.3897/oneeco.2.e13718")); + + Assertions.assertEquals(1, gr.getPid().size()); + Assertions.assertTrue(gr.getPid().get(0).getScheme().equals("doi") + && gr.getPid().get(0).getValue().equals("10.1016/j.triboint.2014.05.004")); + + Assertions.assertEquals("2020-03-23T00:20:51.392Z", gr.getDateofcollection()); + + Assertions.assertEquals(1, gr.getInstance().size()); + + Instance instance = gr.getInstance().get(0); + Assertions.assertEquals(0, instance.getPid().size()); + Assertions.assertEquals(1, instance.getAlternateIdentifier().size()); + Assertions.assertTrue(instance.getAlternateIdentifier().get(0).getScheme().equals("doi") + && instance.getAlternateIdentifier().get(0).getValue().equals("10.3897/oneeco.2.e13718")); + Assertions.assertEquals(null, instance.getLicense()); + Assertions.assertTrue(instance.getAccessright().getCode().equals(Constants.accessRightsCoarMap + .get(ModelConstants.ACCESS_RIGHT_OPEN))); + Assertions.assertTrue(instance.getAccessright().getLabel().equals(ModelConstants.ACCESS_RIGHT_OPEN)); + Assertions.assertTrue(instance.getAccessright().getOpenAccessRoute().equals(OpenAccessRoute.green)); + Assertions.assertEquals(2, instance.getUrl().size()); + Assertions.assertTrue(instance.getUrl().contains("https://doi.org/10.3897/oneeco.2.e13718") + && instance.getUrl().contains("https://oneecosystem.pensoft.net/article/13718/")); + Assertions.assertEquals("2017-01-01",instance.getPublicationdate()); + Assertions.assertEquals(null,instance.getArticleprocessingcharge()); + Assertions.assertEquals("peerReviewed", instance.getRefereed()); + + } @Test @@ -455,6 +633,8 @@ public class DumpJobTest { Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1); + + // verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset // .filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'") // .count() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java index d5a9ba8dd..2fbd37db7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java @@ -7,9 +7,11 @@ import java.nio.file.Path; import java.util.HashMap; import org.apache.commons.io.FileUtils; +import org.apache.neethi.Assertion; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; @@ -236,4 +238,111 @@ public class PrepareResultProjectJobTest { } + @Test + public void testMatchValidated() throws Exception { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_validatedRels") + .getPath(); + + SparkPrepareResultProject.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/preparedInfo", + "-sourcePath", sourcePath + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/preparedInfo") + .map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); + + Assertions.assertTrue(verificationDataset.count() == 2); + + Assertions + .assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + Assertions + .assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); + + verificationDataset.createOrReplaceTempView("dataset"); + + String query = "select resultId, MyT.id project , MyT.title title, MyT.acronym acronym , MyT.provenance.provenance provenance, " + + "MyT.validated.validatedByFunder, MyT.validated.validationDate " + + "from dataset " + + "lateral view explode(projectsList) p as MyT "; + + org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query); + Assertions.assertEquals(3, resultExplodedProvenance.count()); + Assertions.assertEquals(3, resultExplodedProvenance.filter("validatedByFunder = true").count()); + Assertions + .assertEquals( + 2, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); + + Assertions + .assertEquals( + 1, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); + + Assertions + .assertEquals( + 2, + resultExplodedProvenance + .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") + .count()); + + Assertions + .assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " + + "and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " + + "and validatedByFunder = true " + + "and validationDate = '2021-08-06'") + .count()); + + Assertions + .assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " + + "and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80' " + + "and validatedByFunder = true and validationDate = '2021-08-04'") + .count()); + + Assertions + .assertEquals( + 1, + resultExplodedProvenance + .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") + .count()); + + Assertions + .assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' " + + "and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " + + "and validatedByFunder = true and validationDate = '2021-08-05'") + .count()); + + Assertions + .assertEquals( + 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java index 499df3a09..729e78844 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java @@ -24,7 +24,7 @@ public class QueryInformationSystemTest { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and ($x//context/param[./@name = 'status']/text() = 'manager' or $x//context/param[./@name = 'status']/text() = 'all') " + " and ($x//context/param[./@name = 'status']/text() = 'all') " + " return " + " " + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java index 42ad5634a..946e13655 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java @@ -79,7 +79,7 @@ public class SplitForCommunityTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/split/dh-ch") + .textFile(workingDir.toString() + "/split/Digital_Humanities_and_Cultural_Heritage") .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); org.apache.spark.sql.Dataset verificationDataset = spark @@ -92,7 +92,7 @@ public class SplitForCommunityTest { 1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); tmp = sc - .textFile(workingDir.toString() + "/split/egi") + .textFile(workingDir.toString() + "/split/EGI_Federation") .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); verificationDataset = spark @@ -105,7 +105,7 @@ public class SplitForCommunityTest { 1, verificationDataset.filter("id = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); tmp = sc - .textFile(workingDir.toString() + "/split/ni") + .textFile(workingDir.toString() + "/split/Neuroinformatics") .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); verificationDataset = spark @@ -118,7 +118,7 @@ public class SplitForCommunityTest { 1, verificationDataset.filter("id = '50|datacite____::6b1e3a2fa60ed8c27317a66d6357f795'").count()); tmp = sc - .textFile(workingDir.toString() + "/split/science-innovation-policy") + .textFile(workingDir.toString() + "/split/Science_and_Innovation_Policy_Studies") .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); verificationDataset = spark diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java index 448034d0c..76d406422 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java @@ -1,8 +1,6 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; - -import eu.dnetlib.dhp.schema.oaf.Project; import java.util.*; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -16,6 +14,7 @@ import com.google.gson.Gson; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation; import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.utils.DHPUtils; public class CreateRelationTest { @@ -205,7 +204,7 @@ public class CreateRelationTest { " \n" + " oac_ni\n" + " 2018-03-01T12:00:00\n" + - " \n" + + " \n" + " \n" + " \n" + " re3data_____::5b9bf9171d92df854cf3c520692e9122\n" + @@ -441,57 +440,65 @@ public class CreateRelationTest { " oaa_elixir-gr\n" + " 2018-03-01T12:00:00\n" + " \n" + - " \n" + - " BIO-INFORMATICS RESEARCH NETWORK COORDINATING CENTER (BIRN-CC)\n" + - " \n" + - " 1U24RR025736-01\n" + - " NIH\n" + - " \n" + - " \n" + - " COLLABORATIVE RESEARCH: The Cognitive Neuroscience of Category Learning\n" + - " \n" + - " 0223843\n" + - " NSF\n" + - " \n" + - " \n" + - " The Cognitive Atlas: Developing an Interdisciplinary Knowledge Base Through Socia\n" + - " \n" + - " 5R01MH082795-05\n" + - " NIH\n" + - " \n" + - " \n" + - " Fragmented early life environmental and emotional / cognitive vulnerabilities\n" + - " \n" + - " 1P50MH096889-01A1\n" + - " NIH\n" + - " \n" + - " \n" + - " Enhancement of the 1000 Functional Connectome Project\n" + - " \n" + - " 1R03MH096321-01A1\n" + - " TUBITAK\n" + - " \n" + - " \n" + - " CRCNS Data Sharing: An open data repository for cognitive neuroscience: The OpenfMRI Project\n" + - " \n" + - " 1131441\n" + - " NSF\n" + - " \n" + - " \n" + - " Enhancing Human Cortical Plasticity: Visual Psychophysics and fMRI\n" + - " \n" + - " 0121950\n" + - " NSF\n" + - " \n" + - " \n" + - " Transforming statistical methodology for neuroimaging meta-analysis.\n" + - " \n" + - " 100309\n" + - " WT\n" + - " \n" + - " " + + " \n" + + " BIO-INFORMATICS RESEARCH NETWORK COORDINATING CENTER (BIRN-CC)\n" + + + " \n" + + " 1U24RR025736-01\n" + + " NIH\n" + + " \n" + + " \n" + + " COLLABORATIVE RESEARCH: The Cognitive Neuroscience of Category Learning\n" + + + " \n" + + " 0223843\n" + + " NSF\n" + + " \n" + + " \n" + + " The Cognitive Atlas: Developing an Interdisciplinary Knowledge Base Through Socia\n" + + + " \n" + + " 5R01MH082795-05\n" + + " NIH\n" + + " \n" + + " \n" + + " Fragmented early life environmental and emotional / cognitive vulnerabilities\n" + + + " \n" + + " 1P50MH096889-01A1\n" + + " NIH\n" + + " \n" + + " \n" + + " Enhancement of the 1000 Functional Connectome Project\n" + + + " \n" + + " 1R03MH096321-01A1\n" + + " TUBITAK\n" + + " \n" + + " \n" + + " CRCNS Data Sharing: An open data repository for cognitive neuroscience: The OpenfMRI Project\n" + + + " \n" + + " 1131441\n" + + " NSF\n" + + " \n" + + " \n" + + " Enhancing Human Cortical Plasticity: Visual Psychophysics and fMRI\n" + + + " \n" + + " 0121950\n" + + " NSF\n" + + " \n" + + " \n" + + " Transforming statistical methodology for neuroimaging meta-analysis.\n" + + + " \n" + + " 100309\n" + + " WT\n" + + " \n" + + " " + - " \n" + " \n" + " \n" + " rest________::b8e502674c3c3499d5374e9b2ea6d8d5\n" + @@ -626,88 +633,87 @@ public class CreateRelationTest { final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem - .getContextRelation(consumer, "projects", ModelSupport.getIdPrefix(Project.class)); + .getContextRelation(consumer, "projects", ModelSupport.getIdPrefix(Project.class)); cInfoList.forEach(c -> System.out.println(new Gson().toJson(c))); - List rList = new ArrayList<>(); cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add)); - Assertions.assertEquals(44 , rList.size()); + Assertions.assertEquals(44, rList.size()); Assertions - .assertFalse( - rList - .stream() - .map(r -> r.getSource().getId()) - .collect(Collectors.toSet()) - .contains( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))); + .assertFalse( + rList + .stream() + .map(r -> r.getSource().getId()) + .collect(Collectors.toSet()) + .contains( + String + .format( + "%s|%s::%s", Constants.CONTEXT_ID, + Constants.CONTEXT_NS_PREFIX, + DHPUtils.md5("dh-ch")))); Assertions - .assertEquals( - 2, - rList - .stream() - .filter( - r -> r - .getSource() - .getId() - .equals( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("clarin")))) - .collect(Collectors.toList()) - .size()); + .assertEquals( + 2, + rList + .stream() + .filter( + r -> r + .getSource() + .getId() + .equals( + String + .format( + "%s|%s::%s", Constants.CONTEXT_ID, + Constants.CONTEXT_NS_PREFIX, + DHPUtils.md5("clarin")))) + .collect(Collectors.toList()) + .size()); Assertions - .assertEquals( - 2, - rList - .stream() - .filter( - r -> r - .getTarget() - .getId() - .equals( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("clarin")))) - .collect(Collectors.toList()) - .size()); + .assertEquals( + 2, + rList + .stream() + .filter( + r -> r + .getTarget() + .getId() + .equals( + String + .format( + "%s|%s::%s", Constants.CONTEXT_ID, + Constants.CONTEXT_NS_PREFIX, + DHPUtils.md5("clarin")))) + .collect(Collectors.toList()) + .size()); Set tmp = rList - .stream() - .filter( - r -> r - .getSource() - .getId() - .equals( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("clarin")))) - .map(r -> r.getTarget().getId()) - .collect(Collectors.toSet()); + .stream() + .filter( + r -> r + .getSource() + .getId() + .equals( + String + .format( + "%s|%s::%s", Constants.CONTEXT_ID, + Constants.CONTEXT_NS_PREFIX, + DHPUtils.md5("clarin")))) + .map(r -> r.getTarget().getId()) + .collect(Collectors.toSet()); Assertions - .assertTrue( - tmp.contains("40|corda__h2020::b5a4eb56bf84bef2ebc193306b4d423f") && - tmp.contains("40|corda_______::ef782b2d85676aa3e5a907427feb18c4") ); + .assertTrue( + tmp.contains("40|corda__h2020::b5a4eb56bf84bef2ebc193306b4d423f") && + tmp.contains("40|corda_______::ef782b2d85676aa3e5a907427feb18c4")); rList.forEach(rel -> { - if (rel.getSource().getId().startsWith("40|")){ + if (rel.getSource().getId().startsWith("40|")) { String proj = rel.getSource().getId().substring(3); Assertions.assertTrue(proj.substring(0, proj.indexOf("::")).length() == 12); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java index d80eb3ec6..a310448a4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java @@ -10,6 +10,7 @@ import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; @@ -71,7 +72,7 @@ public class DumpRelationTest { public void test1() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation") + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") .getPath(); SparkDumpRelationJob.main(new String[] { @@ -93,6 +94,8 @@ public class DumpRelationTest { verificationDataset.createOrReplaceTempView("table"); + verificationDataset.foreach((ForeachFunction)r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + Dataset check = spark .sql( "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " @@ -127,4 +130,75 @@ public class DumpRelationTest { .count()); } + @Test + public void test2() throws Exception { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation_validated") + .getPath(); + + SparkDumpRelationJob.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath + }); + +// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset"); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + verificationDataset.createOrReplaceTempView("table"); + + verificationDataset.foreach((ForeachFunction)r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + + Dataset check = spark + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); + + Assertions.assertEquals(20, check.filter("name = 'isProvidedBy'").count()); + Assertions + .assertEquals( + 20, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); + + Assertions.assertEquals(7, check.filter("name = 'isParticipant'").count()); + Assertions + .assertEquals( + 7, check + .filter( + "name = 'isParticipant' and stype = 'organization' and ttype = 'project' " + + "and provenance = 'Harvested'") + .count()); + + Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count()); + Assertions + .assertEquals( + 1, check + .filter( + "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + + "and provenance = 'Inferred by OpenAIRE'") + .count()); + + Assertions.assertEquals(2, check.filter("name = 'isProducedBy'").count()); + Assertions + .assertEquals( + 2, check + .filter( + "name = 'isProducedBy' and stype = 'project' and ttype = 'result' " + + "and provenance = 'Harvested' and validated = true " + + "and validationDate = '2021-08-06'") + .count()); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java index d769aa138..fa1528e90 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java @@ -6,6 +6,7 @@ import static org.mockito.Mockito.lenient; import java.util.*; import java.util.function.Consumer; +import eu.dnetlib.dhp.schema.common.ModelSupport; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -528,7 +529,7 @@ public class QueryInformationSystemTest { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.execContextRelationQuery(); - queryInformationSystem.getContextRelation(consumer, "contentproviders", "10|"); + queryInformationSystem.getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource")); Assertions.assertEquals(5, cInfoList.size()); } @@ -539,7 +540,7 @@ public class QueryInformationSystemTest { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.execContextRelationQuery(); - queryInformationSystem.getContextRelation(consumer, "contentproviders", "10"); + queryInformationSystem.getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource")); cInfoList.forEach(contextInfo -> { switch (contextInfo.getId()) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java index 6c5ebbab3..0b062abd5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java @@ -75,8 +75,8 @@ public class ResultLinkedToProjectTest { .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json") .getPath(); - final String relationPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/relations.json") + final String graphPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch") .getPath(); SparkResultLinkedToProject.main(new String[] { @@ -84,7 +84,7 @@ public class ResultLinkedToProjectTest { "-outputPath", workingDir.toString() + "/preparedInfo", "-sourcePath", sourcePath, "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", - "-relationPath", relationPath + "-graphPath", graphPath }); @@ -109,7 +109,7 @@ public class ResultLinkedToProjectTest { .getPath(); final String relationPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relations.json") + .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match") .getPath(); SparkResultLinkedToProject.main(new String[] { @@ -117,7 +117,7 @@ public class ResultLinkedToProjectTest { "-outputPath", workingDir.toString() + "/preparedInfo", "-sourcePath", sourcePath, "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", - "-relationPath", relationPath + "-graphPath", relationPath }); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java index 26187f0a3..9dd2aa41e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java @@ -75,7 +75,7 @@ public class SplitPerFunderTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-outputPath", workingDir.toString() + "/split", "-sourcePath", sourcePath, - "-relationPath", sourcePath + "-graphPath", sourcePath }); @@ -143,5 +143,11 @@ public class SplitPerFunderTest { .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); Assertions.assertEquals(1, tmp.count()); + // CONICYT 0 + tmp = sc + .textFile(workingDir.toString() + "/split/CONICYTF") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(0, tmp.count()); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/project b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/project new file mode 100644 index 000000000..f68cead77 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/project @@ -0,0 +1,12 @@ +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Fotoniikka ja modernit kuvantamismenetelmät LT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"135027"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-01-25","dateoftransformation":"2019-04-16","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"190,000 €"},"originalId":["aka_________::135027"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Academy Project Funding TT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"316061"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-01-25","dateoftransformation":"2019-04-16","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|nwo_________::dc69ada721bf21ed51055b6421850d73","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"450,000 €"},"originalId":["aka_________::316061"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population."},"totalcost":0.0} +{"acronym":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"MOISE"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ANR-17-CE05-0033"},"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-12-24","dateoftransformation":"2020-01-07","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n anr_________::ANR\n ANR\n French National Research Agency (ANR)\n Agence Nationale de la Recherche\n FR\n "}],"id":"40|nih_________::031bb5f2f70239b3210eda38b2079f67","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["anr_________::ANR-17-CE05-0033"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS"},"totalcost":0.0} +{"acronym":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"GALAXY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ANR-09-SEGI-0005"},"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-12-24","dateoftransformation":"2020-01-07","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n anr_________::ANR\n ANR\n French National Research Agency (ANR)\n Agence Nationale de la Recherche\n FR\n "}],"id":"40|nsf_________::03748bcb5d754c951efec9700e18a56d","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["anr_________::ANR-09-SEGI-0005"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"DEVELOPPEMENT COLLABORATIF DE SYSTEMES COMPLEXES SELON UNE APPROCHE GUIDEE PAR LES MODELES"},"totalcost":0.0} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LE0347462"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2015-08-24","dateoftransformation":"2018-11-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2003-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n \n arc:fundingStream\n \n "}],"id":"40|snsf________::50cb15ff7a6a3f8531f063770179e346","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"keywords":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"biomedical nanostructures,femtosecond laser machining,laser manufacturing,laser micromachining,microphotonics,photonic bandgap structures"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::LE0347462"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2003-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Femtosecond laser micromachining facility"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/LE0347462"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LP140100567"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2015-08-24","dateoftransformation":"2018-11-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2017-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Linkage Projects\n Linkage Projects\n Linkage Projects\n \n arc:fundingStream\n \n "}],"id":"40|corda_______::ffc1811633b3222e4764c7b0517f83e8","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"keywords":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"EDUCATIONAL MEASUREMENT; EDUCATIONAL MEASUREMENT; HIGH-STAKES TESTING; HIGH-STAKES TESTING; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::LP140100567"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2014-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Linkage Projects - Grant ID: LP140100567"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/LP140100567"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"DP180101235"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2018-04-02","dateoftransformation":"2019-09-02","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2023-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Discovery Projects\n Discovery Projects\n Discovery Projects\n \n arc:fundingStream\n \n "}],"id":"40|nhmrc_______::4e6c928fef9851b37ec73f4f6daca35b","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::DP180101235"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2018-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Discovery Projects - Grant ID: DP180101235"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/DP180101235"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LE0989831"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2015-08-24","dateoftransformation":"2018-11-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2009-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n \n arc:fundingStream\n \n "}],"id":"40|corda__h2020::846b777af165fef7c904a81712a83b66","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"keywords":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"database metadata,digital sound,electroacoustic music,film music,music,music information retrieval"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::LE0989831"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2009-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"The Australian Music Navigator: research infrastructure for discovering, accessing and analysing Australia's musical landscape"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/LE0989831"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"3120023"},"collectedfrom":[{"key":"10|openaire____::d1235f66ad3adbcf6c8faf35fa490885","value":"Comisión Nacional de Investigación Científica y Tecnológica"}],"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"IBACACHE ROJAS, JUANA"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2017-09-11","dateoftransformation":"2018-09-28","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2014-01-28"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"conicytf____::CONICYTCONICYTComisión Nacional de Investigación Científica y TecnológicaCLconicytf____::CONICYT::FONDECYT::POSTDOCTORADOFondecyt stream, POSTDOCTORADOFondecyt stream, POSTDOCTORADOconicyt:fondecytfundingsconicytf____::CONICYT::FONDECYTFONDECYTFondecyt fundingsconicyt:fondecytfundings"}],"id":"40|irb_hr______::1e5e62235d094afd01cd56e65112fc63","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["conicytf____::3120023"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2011-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"SYNTHESIS AND STRUCTURE-ACTIVITY RELATIONSHIPS OF HETEROARYLISOQUINOLINE- AND PHENANTHRIDINEQUINONES AS ANTITUMOR AGENTS"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://repositorio.conicyt.cl/handle/10533/183109"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"1040240"},"collectedfrom":[{"key":"10|openaire____::d1235f66ad3adbcf6c8faf35fa490885","value":"Comisión Nacional de Investigación Científica y Tecnológica"}],"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ROMERO FIGUEROA, JULIO"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2017-09-11","dateoftransformation":"2018-09-28","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2007-01-15"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"conicytf____::CONICYTCONICYTComisión Nacional de Investigación Científica y TecnológicaCLconicytf____::CONICYT::FONDECYT::REGULARFondecyt stream, REGULARFondecyt stream, REGULARconicyt:fondecytfundingsconicytf____::CONICYT::FONDECYTFONDECYTFondecyt fundingsconicyt:fondecytfundings"}],"id":"40|conicytf____::96b47b91a6c061e31f626612b1650c03","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["conicytf____::1040240"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2004-01-15"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ESTUDIO TEORICO-EXPERIMENTAL DE LA PERMEACION DE FLUIDOS SUPERCRITICOS Y LA SEPARACION DE MEZCLAS A ALTA PRESION A TRAVES DE MEMBRANAS MICROPOROSAS."},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://repositorio.conicyt.cl/handle/10533/163340"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"1020683"},"collectedfrom":[{"key":"10|openaire____::d1235f66ad3adbcf6c8faf35fa490885","value":"Comisión Nacional de Investigación Científica y Tecnológica"}],"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"MANRIQUEZ CASTRO, VICTOR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2017-09-11","dateoftransformation":"2018-09-28","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2006-01-15"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"conicytf____::CONICYTCONICYTComisión Nacional de Investigación Científica y TecnológicaCLconicytf____::CONICYT::FONDECYT::REGULARFondecyt stream, REGULARFondecyt stream, REGULARconicyt:fondecytfundingsconicytf____::CONICYT::FONDECYTFONDECYTFondecyt fundingsconicyt:fondecytfundings"}],"id":"40|conicytf____::b122147e0a13f34cdb6311a9d714f9a5","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["conicytf____::1020683"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2002-01-15"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"SINTESIS Y CARACTERIZACION DE SALES CUATERNARIAS CON EL ANION CALCOFOSFATO [P2Qy]4- (Q=S,Se;y=6,7) PROPIEDADES FISICAS Y REACCIONES DE INCLUSION."},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://repositorio.conicyt.cl/handle/10533/162452"}} +{"acronym":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ANIM"},"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"FP7-PEOPLE-2013-IIF"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"628405"},"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"contactemail":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"M.D.Davies@bristol.ac.uk"},"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Davies, Maria"},"contactphone":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"+44 117 3317352"},"contracttype":{"classid":"MC","classname":"Support for training and career development of researchers (Marie Curie)","schemeid":"ec:FP7contractTypes","schemename":"ec:FP7contractTypes"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2018-03-12","dateoftransformation":"2020-06-26","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2016-04-30"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ec__________::ECECEuropean CommissionEUec__________::EC::FP7::SP3::PEOPLEMarie-Curie ActionsPEOPLEec:programec__________::EC::FP7::SP3SP3-PeopleSP3ec:specificprogramec__________::EC::FP7SEVENTH FRAMEWORK PROGRAMMEFP7ec:frameworkprogram"}],"id":"40|corda_______::132bac68f17bb81c451d9071be6e4d6d","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"MC-IIF"},"optional2":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"https://ec.europa.eu/research/participants/portal/page/call_FP7#wlp_call_FP7"},"originalId":["corda_______::628405"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2014-05-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Precisely Defined, Surface-Engineered Nanostructures via Crystallization-Driven Self-Assembly of Linear-Dendritic Block Copolymers"},"totalcost":0.0} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/publication b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/publication index 901b34abc..a58ac7520 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/publication +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/publication @@ -9,4 +9,5 @@ {"author":[{"fullname":"K.P. Moustris","name":"K. P.","rank":1,"surname":"Moustris"},{"fullname":"D. Zafirakis","name":"D.","rank":2,"surname":"Zafirakis"},{"fullname":"K.A. Kavvadias","name":"K. A.","rank":3,"surname":"Kavvadias"},{"fullname":"J.K. Kaldellis","name":"J. K.","rank":4,"surname":"Kaldellis"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"}],"container":{"name":"Mediterranean Conference on Power Generation, Transmission, Distribution and Energy Conversion (MedPower 2016)"},"context":[{"code":"sdsn-gr","label":"SDSN - Greece","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"2020-02-01T05:10:47Z","description":["One of the main parameters affecting the reliability of the renewable energy sources (RES) system, compared to the local conventional power station, is the ability to forecast the RES availability for a few hours ahead. To this end, the main objective of this work is the prognosis of the mean, maximum and minimum hourly wind power (WP) 8hours ahead. For this purpose, Artificial Neural Networks (ANN) modeling is applied. For the appropriate training of the developed ANN models hourly meteorological data are used. These data have been recorded by a meteorological mast in Tilos Island, Greece.\n\nFor the evaluation of the developed ANN forecasting models proper statistical evaluation indices are used. According to the results, the coefficient of the determination ranges from 0.285 up to 0.768 (mean hourly WP), from 0.227 up to 0.798 (maximum hourly WP) and from 0.025 up to 0.398 (minimum hourly WP). Furthermore, the proposed forecasting methodology shows that is able to give sufficient and adequate prognosis of WP by a wind turbine in a specific location 8 hours ahead. This will be a useful tool for the operator of a RES system in order to achieve a better monitoring and a better management of the whole system."],"format":[],"id":"50|dedup_wf_001::0a6525f420fd1f922125c3a1a280c8df","instance":[{"accessright":{"code":"c_16ec","label":"RESTRICTED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2017-02-23T20:11:53Z","refereed":"UNKNOWN","type":"Conference object","url":["http://digital-library.theiet.org/content/conferences/10.1049/cp.2016.1094?crawler=true","http://dx.doi.org/10.1049/cp.2016.1094"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"license":"cc-by","refereed":"UNKNOWN","type":"Conference object","url":["https://zenodo.org/record/887339/files/Wind%20power%20forecasting%20using%20historical%20data%20and%20artificial%20neural%20networks%20modeling.pdf"]},{"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"refereed":"UNKNOWN","type":"Conference object","url":["http://digital-library.theiet.org/content/conferences/10.1049/cp.2016.1094","https://core.ac.uk/display/144851191","http://jglobal.jst.go.jp/public/20090422/201702260673071390","https://academic.microsoft.com/#/detail/2590295725"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"license":"http://creativecommons.org/licenses/by/4.0/legalcode","publicationdate":"2016-11-07","refereed":"UNKNOWN","type":"Conference object","url":["https://zenodo.org/record/887339"]}],"language":{"code":"und","label":"Undetermined"},"lastupdatetimestamp":1603727328518,"maintitle":"Wind power forecasting using historical data and artificial neural networks modeling","originalId":["10.1049/cp.2016.1094","2590295725","oai:zenodo.org:887339"],"pid":[{"scheme":"doi","value":"10.1049/cp.2016.1094"}],"projects":[{"acronym":"TILOS","code":"646529","funder":{"fundingStream":"H2020","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda__h2020::64c901e621f90b9a38badb1d78902205","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Technology Innovation for the Local Scale, Optimum Integration of Battery Energy Storage"}],"publicationdate":"2016-11-07","publisher":"Institution of Engineering and Technology","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Wind Power, Forecasting, Artificial Neural Networks"}}],"type":"publication"} {"author":[{"fullname":"Juan M. Traverso","name":"Juan M.","rank":1,"surname":"Traverso"},{"fullname":"Julien Bobe","name":"Julien","rank":2,"surname":"Bobe"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"}],"context":[{"code":"fam","label":"Fisheries and Aquaculture Management","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]},{"code":"mes","label":"European Marine Science","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"contributor":["Station commune de Recherches en Ichtyophysiologie, Biodiversité et Environnement (SCRIBE) ; Institut National de la Recherche Agronomique (INRA)","Institut Fédératif de Recherche - Génétique Fonctionnelle Agronomie et Santé (IFR 140 GFAS)","European Project: 222719,EC:FP7:KBBE,FP7-KBBE-2007-2A,LIFECYCLE(2009)","Station commune de Recherches en Ichtyophysiologie, Biodiversité et Environnement (SCRIBE) ; Institut National de la Recherche Agronomique (INRA)-IFR140"],"country":[{"code":"FR","label":"France","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"2020-10-17T09:56:33.515Z","description":[],"format":[],"id":"50|dedup_wf_001::0b9078b1d22faeab00e52a2796dbbad0","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},"hostedby":{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},"publicationdate":"2009-07-18","refereed":"UNKNOWN","type":"Conference object","url":["https://hal.inrae.fr/hal-02754267"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},"hostedby":{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},"publicationdate":"2009-07-18","refereed":"UNKNOWN","type":"Conference object","url":["https://hal.inrae.fr/hal-02754267"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"hostedby":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"publicationdate":"2009-07-18","refereed":"UNKNOWN","type":"Conference object","url":["https://hal.inrae.fr/hal-02754267"]},{"accessright":{"code":"c_16ec","label":"RESTRICTED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|issn___print::e7b4be4856b6d6d7c3484bf403b44634","value":"Biology of Reproduction"},"publicationdate":"2009-07-01","refereed":"UNKNOWN","type":"Article","url":["http://academic.oup.com/biolreprod/article-pdf/81/Suppl_1/285/","http://dx.doi.org/10.1093/biolreprod/81.s1.285"]},{"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|issn___print::e7b4be4856b6d6d7c3484bf403b44634","value":"Biology of Reproduction"},"refereed":"UNKNOWN","type":"Article","url":["https://academic.oup.com/biolreprod/article/2955191/The","https://academic.microsoft.com/#/detail/2594950426"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1603727328518,"maintitle":"The maternal contribution of oocyte-specific genes to embryo development in zebrafish: analysis by real-time PCR and knock-down using morpholino oligos","originalId":["oai:HAL:hal-02754267v1","10.1093/biolreprod/81.s1.285","2594950426"],"pid":[{"scheme":"doi","value":"10.1093/biolreprod/81.s1.285"}],"projects":[{"acronym":"LIFECYCLE","code":"222719","funder":{"fundingStream":"FP7","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda_______::f3b07b1c9d0afbdb983a9b5820f4e34b","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Building a biological knowledge-base on fish lifecycles for competitive, sustainable European aquaculture"}],"publicationdate":"2009-07-18","publisher":"HAL CCSD","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"[SDV.BDLR]Life Sciences [q-bio]/Reproductive Biology"}}],"type":"publication"} {"author":[{"fullname":"Pasquale Nino","name":"Pasquale","pid":{"id":{"scheme":"orcid","value":"0000-0002-7070-4586"}},"rank":1,"surname":"Nino"},{"fullname":"Guido D'Urso","name":"Guido","pid":{"id":{"scheme":"orcid","value":"0000-0002-0251-4668"}},"rank":2,"surname":"D'Urso"},{"fullname":"Salvatore Falanga Bolognesi","name":"Salvatore","pid":{"id":{"scheme":"orcid","value":"0000-0003-0134-2574"}},"rank":3,"surname":"Falanga Bolognesi"},{"fullname":"Giuseppe Pulighe","name":"Giuseppe","pid":{"id":{"scheme":"orcid","value":"0000-0002-6470-0984"}},"rank":4,"surname":"Pulighe"},{"fullname":"Carlo De Michele","name":"Carlo","pid":{"id":{"scheme":"orcid","value":"0000-0002-3797-850X"}},"rank":5,"surname":"De Michele"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9","value":"Sygma"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|openaire____::c2cdfa5866e03cdd07d313cbc8fb8311","value":"Multidisciplinary Digital Publishing Institute"},{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},{"key":"10|opendoar____::d8c24ca8f23c562a5600876ca2a550ce","value":"Archivio della ricerca - Università degli studi di Napoli Federico II"}],"container":{"edition":"","ep":"14730","iss":"11","issnLinking":"","issnOnline":"2072-4292","issnPrinted":"","name":"Remote Sensing","sp":"14708","vol":"7"},"context":[{"code":"sdsn-gr","label":"SDSN - Greece","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"contributor":[],"country":[{"code":"IT","label":"Italy","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"","description":["The sustainable management of water resources plays a key role in Mediterranean viticulture, characterized by scarcity and competition of available water. This study focuses on estimating the evapotranspiration and crop coefficients of table grapes vineyards trained on overhead “tendone” systems in the Apulia region (Italy). Maximum vineyard transpiration was estimated by adopting the “direct” methodology for ETp proposed by the Food and Agriculture Organization in Irrigation and Drainage Paper No. 56, with crop parameters estimated from Landsat 8 and RapidEye satellite data in combination with ground-based meteorological data. The modeling results of two growing seasons (2013 and 2014) indicated that canopy growth, seasonal and 10-day sums evapotranspiration values were strictly related to thermal requirements and rainfall events. The estimated values of mean seasonal daily evapotranspiration ranged between 4.2 and 4.1 mm·d−1, while midseason estimated values of crop coefficients ranged from 0.88 to 0.93 in 2013, and 1.02 to 1.04 in 2014, respectively. The experimental evapotranspiration values calculated represent the maximum value in absence of stress, so the resulting crop coefficients should be used with some caution. It is concluded that the retrieval of crop parameters and evapotranspiration derived from remotely-sensed data could be helpful for downscaling to the field the local weather conditions and agronomic practices and thus may be the basis for supporting grape growers and irrigation managers."],"format":["application/pdf"],"id":"50|dedup_wf_001::0bc332de35b4eb0cc3d0c54b3ff22dd3","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9","value":"Sygma"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"license":"https://creativecommons.org/licenses/by/4.0/","refereed":"UNKNOWN","type":"Article","url":["http://dx.doi.org/10.3390/rs71114708"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|doajarticles::f7180a75ef642fcf5309759e0d2c431a","value":"Remote Sensing"},"license":"https://creativecommons.org/licenses/by/4.0/","publicationdate":"2015-11-05","refereed":"UNKNOWN","type":"Article","url":["http://www.mdpi.com/2072-4292/7/11/14708/pdf","http://dx.doi.org/10.3390/rs71114708"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|doajarticles::f7180a75ef642fcf5309759e0d2c431a","value":"Remote Sensing"},"license":"cc-by","refereed":"UNKNOWN","type":"Article","url":["https://www.mdpi.com/2072-4292/7/11/14708/pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|doajarticles::f7180a75ef642fcf5309759e0d2c431a","value":"Remote Sensing"},"refereed":"UNKNOWN","type":"Article","url":["https://dblp.uni-trier.de/db/journals/remotesensing/remotesensing7.html#VaninoPNMBD15","https://doi.org/10.3390/rs71114708","https://core.ac.uk/display/55147144","http://doi.org/10.3390/rs71114708","https://academic.microsoft.com/#/detail/2149946514"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::c2cdfa5866e03cdd07d313cbc8fb8311","value":"Multidisciplinary Digital Publishing Institute"},"hostedby":{"key":"10|doajarticles::f7180a75ef642fcf5309759e0d2c431a","value":"Remote Sensing"},"license":"http://creativecommons.org/licenses/by/3.0/","publicationdate":"2015-11-05","refereed":"UNKNOWN","type":"Other literature type","url":["http://dx.doi.org/10.3390/rs71114708"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},"hostedby":{"key":"10|doajarticles::f7180a75ef642fcf5309759e0d2c431a","value":"Remote Sensing"},"publicationdate":"2015-11-01","refereed":"UNKNOWN","type":"Article","url":["http://www.mdpi.com/2072-4292/7/11/14708","https://doaj.org/toc/2072-4292"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::d8c24ca8f23c562a5600876ca2a550ce","value":"Archivio della ricerca - Università degli studi di Napoli Federico II"},"hostedby":{"key":"10|opendoar____::d8c24ca8f23c562a5600876ca2a550ce","value":"Archivio della ricerca - Università degli studi di Napoli Federico II"},"publicationdate":"2015-01-01","refereed":"UNKNOWN","type":"Article","url":["http://hdl.handle.net/11588/637935"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1603727328518,"maintitle":"Estimation of Evapotranspiration and Crop Coefficients of Tendone Vineyards Using Multi-Sensor Remote Sensing Data in a Mediterranean Environment","originalId":["","rs71114708","10.3390/rs71114708","2149946514","oai:mdpi.com:/2072-4292/7/11/14708/","oai:doaj.org/article:939730c0b55b4a8bbaa150970587d25a","oai:www.iris.unina.it:11588/637935"],"pid":[{"scheme":"doi","value":"10.3390/rs71114708"}],"projects":[{"acronym":"FATIMA","code":"633945","funder":{"fundingStream":"H2020","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda__h2020::b0e820e327215ca5b86884d536a17fef","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"FArming Tools for external nutrient Inputs and water MAnagement"}],"publicationdate":"2015-11-05","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"evapotranspiration"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"crop coefficient"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"leaf area index"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Landsat 8"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"RapidEye"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"remote sensing"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"vineyards"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"table grapes"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Science"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Q"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Crop coefficient; Evapotranspiration; Landsat 8; Leaf area index; RapidEye; Remote sensing; Table grapes; Vineyards; Earth and Planetary Sciences (all)"}}],"type":"publication"} -{"author":[{"fullname":"Rousselet, L.","name":"L.","pid":{"id":{"scheme":"orcid","value":"0000-0001-5016-3658"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":1,"surname":"Rousselet"},{"fullname":"Doglioli, A. M.","name":"A. M.","pid":{"id":{"scheme":"orcid","value":"0000-0003-1309-9954"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":2,"surname":"Doglioli"},{"fullname":"Verneil, A.","name":"A.","pid":{"id":{"scheme":"orcid","value":"0000-0002-8344-7953"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":3,"surname":"Verneil"},{"fullname":"Pietri, A.","name":"A.","pid":{"id":{"scheme":"orcid","value":"0000-0003-1111-9640"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":4,"surname":"Pietri"},{"fullname":"Della Penna, A.","name":"A.","pid":{"id":{"scheme":"orcid","value":"0000-0002-7579-3610"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":5,"surname":"Della Penna"},{"fullname":"Berline, L.","name":"L.","pid":{"id":{"scheme":"orcid","value":"0000-0002-5831-7399"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":6,"surname":"Berline"},{"fullname":"Marrec, P.","name":"P.","pid":{"id":{"scheme":"orcid","value":"0000-0002-7811-4150"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":7,"surname":"Marrec"},{"fullname":"Grégori, G.","name":"G.","rank":8,"surname":"Grégori"},{"fullname":"Thyssen, M.","name":"M.","rank":9,"surname":"Thyssen"},{"fullname":"Carlotti, F.","name":"F.","rank":10,"surname":"Carlotti"},{"fullname":"Barrillon, S.","name":"S.","rank":11,"surname":"Barrillon"},{"fullname":"Simon‐Bot, F.","name":"F.","rank":12,"surname":"Simon‐bot"},{"fullname":"Bonal, M.","name":"M.","pid":{"id":{"scheme":"orcid","value":"0000-0002-2077-3049"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":13,"surname":"Bonal"},{"fullname":"d'Ovidio, F.","name":"F.","rank":14,"surname":"D Ovidio"},{"fullname":"Petrenko, A.","name":"A.","rank":15,"surname":"Petrenko"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|opendoar____::71560ce98c8250ce57a6a970c9991a5f","value":"HAL-IRD"},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},{"key":"10|opendoar____::18bb68e2b38e4a8ce7cf4f6b2625768c","value":"Hal-Diderot"},{"key":"10|opendoar____::2d2c8394e31101a261abf1784302bf75","value":"HAL AMU"},{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},{"key":"10|opendoar____::8f14e45fceea167a5a36dedd4bea2543","value":"ArchiMer - Institutional Archive of Ifremer"}],"context":[{"code":"mes","label":"European Marine Science","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"contributor":["Institut méditerranéen d'océanologie (MIO) ; Centre National de la Recherche Scientifique (CNRS)-Université de Toulon (UTLN)-Aix Marseille Université (AMU)-Institut de Recherche pour le Développement (IRD)","New York University [Abu Dhabi] ; NYU System (NYU)","Laboratoire d'Océanographie et du Climat : Expérimentations et Approches Numériques (LOCEAN) ; Muséum National d'Histoire Naturelle (MNHN)-Institut de Recherche pour le Développement (IRD)-Institut national des sciences de l'Univers (INSU - CNRS)-Sorbonne Université (SU)-Centre National de la Recherche Scientifique (CNRS)","University of Washington [Seattle]","Université Libre de Bruxelles [Bruxelles] (ULB)","ANR-09-CEXC-0006,FOCEA,Excellence pour une synergie observation-modèle et l'évaluation de l'état écologique de la Méditerranée Nord Occidentale(2009)","ANR: 11-IDEX-0001,Amidex,INITIATIVE D'EXCELLENCE AIX MARSEILLE UNIVERSITE(2011)","ANR: 11-LABX-0061,OTMed,Objectif Terre : Bassin Méditerranéen(2011)","European Project: 624170,EC:FP7:PEOPLE,FP7-PEOPLE-2013-IEF,SEAQUEST(2015)","Institut méditerranéen d'océanologie (MIO) ; Institut de Recherche pour le Développement (IRD)-Aix Marseille Université (AMU)-Institut national des sciences de l'Univers (INSU - CNRS)-Université de Toulon (UTLN)-Centre National de la Recherche Scientifique (CNRS)","Laboratoire d'Océanographie et du Climat : Expérimentations et Approches Numériques (LOCEAN) ; Sorbonne Université (SU)-Institut national des sciences de l'Univers (INSU - CNRS)-Centre National de la Recherche Scientifique (CNRS)-Muséum national d'Histoire naturelle (MNHN)-Institut de Recherche pour le Développement (IRD)","Université libre de Bruxelles (ULB)","Processus et interactions de fine échelle océanique (PROTEO) ; Laboratoire d'Océanographie et du Climat : Expérimentations et Approches Numériques (LOCEAN) ; Sorbonne Université (SU)-Institut national des sciences de l'Univers (INSU - CNRS)-Centre National de la Recherche Scientifique (CNRS)-Muséum national d'Histoire naturelle (MNHN)-Institut de Recherche pour le Développement (IRD)-Sorbonne Université (SU)-Institut national des sciences de l'Univers (INSU - CNRS)-Centre National de la Recherche Scientifique (CNRS)-Muséum national d'Histoire naturelle (MNHN)-Institut de Recherche pour le Développement (IRD)","ANR-11-IDEX-0001-02/11-IDEX-0001,AMIDEX,AMIDEX(2011)"],"country":[{"code":"FR","label":"France","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"2019-10-11T09:44:42.348Z","description":["International audience; Vertical velocities can be estimated indirectly from in situ observations by theoretical frameworks like the-equation. Direct measures of vertical exchanges are challenging due to their typically ephemeral spatiotemporal scales. In this study we address this problem with an adaptive sampling strategy coupling various biophysical instruments. We analyze the 3-D organization of a cyclonic mesoscale structure finely sampled during the Observing Submesoscale Coupling At High Resolution cruise in the Ligurian Sea during fall 2015. The observations, acquired with a moving vessel profiler, highlight a subsurface low-salinity layer (≃50 m), as well as rising isopycnals, generated by geostrophic cyclonic circulation, in the structure's center. Reconstructed 3-D fields of density and horizontal velocities are used to estimate the vertical velocity field down to 250 m by applying the adiabatic QG-equation, for the first time in this region. The vertical motions are characterized by multipolar patterns of downward and upward velocities on the edges of the structure and significantly smaller vertical velocities in its center. Both the 3-D distribution of particles (size ≥100 μm), measured with a laser optical plankton counter, and the Synechococcus and Prochlorococcus abundances (cell per cubic meter) measured by flow cytometry are consistent with the 3-D velocity field. In particular, a secondary vertical recirculation is identified that upwells particles (from 250 to 100 m) along isohalines to the structure's center. Besides demonstrating the effect of vertical patterns on biogeochemical distributions, this case study suggests to use particle matter as a tracer to assess physical dynamics."],"format":["application/pdf"],"id":"50|dedup_wf_001::0d16b1714ab3077df73893a8ea57d776","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"hostedby":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795/document","https://hal-amu.archives-ouvertes.fr/hal-02124795/file/Vertical%20motions%20and%20their%20effects%20on%20a%20biogeochemical%20tracer%20in%20a%20cyclonic%20structure%20finely%20observed%20in%20the%20Ligurian%20Sea.pdf"]},{"accessright":{"code":"c_16ec","label":"RESTRICTED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|issn___print::3e65fd758a9d3e24027c0cb2822a79f2","value":"Journal of Geophysical Research Oceans"},"license":"http://onlinelibrary.wiley.com/termsAndConditions#vor","publicationdate":"2019-04-23T07:08:10Z","refereed":"UNKNOWN","type":"Article","url":["https://onlinelibrary.wiley.com/doi/pdf/10.1029/2018JC014392","https://onlinelibrary.wiley.com/doi/full-xml/10.1029/2018JC014392","http://dx.doi.org/10.1029/2018jc014392"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|issn___print::3e65fd758a9d3e24027c0cb2822a79f2","value":"Journal of Geophysical Research Oceans"},"refereed":"UNKNOWN","type":"Article","url":["https://archimer.ifremer.fr/doc/00490/60214/63567.pdf"]},{"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|issn___print::3e65fd758a9d3e24027c0cb2822a79f2","value":"Journal of Geophysical Research Oceans"},"refereed":"UNKNOWN","type":"Article","url":["https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1029/2018JC014392","https://hal-amu.archives-ouvertes.fr/hal-02124795v2","https://academic.microsoft.com/#/detail/2941945454"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::71560ce98c8250ce57a6a970c9991a5f","value":"HAL-IRD"},"hostedby":{"key":"10|opendoar____::71560ce98c8250ce57a6a970c9991a5f","value":"HAL-IRD"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"hostedby":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2019-06-07","refereed":"UNKNOWN","type":"Other literature type","url":["http://dx.doi.org/10.1029/2018jc014392"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},"hostedby":{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::18bb68e2b38e4a8ce7cf4f6b2625768c","value":"Hal-Diderot"},"hostedby":{"key":"10|opendoar____::18bb68e2b38e4a8ce7cf4f6b2625768c","value":"Hal-Diderot"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::2d2c8394e31101a261abf1784302bf75","value":"HAL AMU"},"hostedby":{"key":"10|opendoar____::2d2c8394e31101a261abf1784302bf75","value":"HAL AMU"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},"hostedby":{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::8f14e45fceea167a5a36dedd4bea2543","value":"ArchiMer - Institutional Archive of Ifremer"},"hostedby":{"key":"10|opendoar____::8f14e45fceea167a5a36dedd4bea2543","value":"ArchiMer - Institutional Archive of Ifremer"},"publicationdate":"2019-06-01","refereed":"UNKNOWN","type":"Other literature type","url":["https://archimer.ifremer.fr/doc/00490/60214/"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1603727328518,"maintitle":"Vertical motions and their effects on a biogeochemical tracer in a cyclonic structure finely observed in the Ligurian Sea","originalId":["oai:HAL:hal-02124795v1","10.1029/2018JC014392","10.1029/2018jc014392","2941945454","oai:HAL:hal-02124795v2","oai:archimer.ifremer.fr:60214"],"pid":[{"scheme":"doi","value":"10.1029/2018JC014392"}],"projects":[{"acronym":"SEAQUEST","code":"624170","funder":{"fundingStream":"FP7","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda_______::a2aa3e7ebdf06a249bbedc0cef87e602","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Meso and Sub-mesoscale Physico-biogeochemical Dynamics in a Coastal NW Mediterranean Sea: Quantifying and Understanding Ecosystem Structure and Transport"}],"publicationdate":"2019-04-23","publisher":"HAL CCSD","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"high‐resolution reconstructions of 3‐D fields"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"vertical velocities estimated with ω‐equation"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"particle distribution as a tracer for vertical advection"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"[SDU.OCEAN]Sciences of the Universe [physics]/Ocean, Atmosphere"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"high-resolution reconstructions of 3-D fields"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"vertical velocities estimated with omega-equation"}}],"type":"publication"} \ No newline at end of file +{"author":[{"fullname":"Rousselet, L.","name":"L.","pid":{"id":{"scheme":"orcid","value":"0000-0001-5016-3658"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":1,"surname":"Rousselet"},{"fullname":"Doglioli, A. M.","name":"A. M.","pid":{"id":{"scheme":"orcid","value":"0000-0003-1309-9954"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":2,"surname":"Doglioli"},{"fullname":"Verneil, A.","name":"A.","pid":{"id":{"scheme":"orcid","value":"0000-0002-8344-7953"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":3,"surname":"Verneil"},{"fullname":"Pietri, A.","name":"A.","pid":{"id":{"scheme":"orcid","value":"0000-0003-1111-9640"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":4,"surname":"Pietri"},{"fullname":"Della Penna, A.","name":"A.","pid":{"id":{"scheme":"orcid","value":"0000-0002-7579-3610"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":5,"surname":"Della Penna"},{"fullname":"Berline, L.","name":"L.","pid":{"id":{"scheme":"orcid","value":"0000-0002-5831-7399"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":6,"surname":"Berline"},{"fullname":"Marrec, P.","name":"P.","pid":{"id":{"scheme":"orcid","value":"0000-0002-7811-4150"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":7,"surname":"Marrec"},{"fullname":"Grégori, G.","name":"G.","rank":8,"surname":"Grégori"},{"fullname":"Thyssen, M.","name":"M.","rank":9,"surname":"Thyssen"},{"fullname":"Carlotti, F.","name":"F.","rank":10,"surname":"Carlotti"},{"fullname":"Barrillon, S.","name":"S.","rank":11,"surname":"Barrillon"},{"fullname":"Simon‐Bot, F.","name":"F.","rank":12,"surname":"Simon‐bot"},{"fullname":"Bonal, M.","name":"M.","pid":{"id":{"scheme":"orcid","value":"0000-0002-2077-3049"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":13,"surname":"Bonal"},{"fullname":"d'Ovidio, F.","name":"F.","rank":14,"surname":"D Ovidio"},{"fullname":"Petrenko, A.","name":"A.","rank":15,"surname":"Petrenko"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|opendoar____::71560ce98c8250ce57a6a970c9991a5f","value":"HAL-IRD"},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},{"key":"10|opendoar____::18bb68e2b38e4a8ce7cf4f6b2625768c","value":"Hal-Diderot"},{"key":"10|opendoar____::2d2c8394e31101a261abf1784302bf75","value":"HAL AMU"},{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},{"key":"10|opendoar____::8f14e45fceea167a5a36dedd4bea2543","value":"ArchiMer - Institutional Archive of Ifremer"}],"context":[{"code":"mes","label":"European Marine Science","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"contributor":["Institut méditerranéen d'océanologie (MIO) ; Centre National de la Recherche Scientifique (CNRS)-Université de Toulon (UTLN)-Aix Marseille Université (AMU)-Institut de Recherche pour le Développement (IRD)","New York University [Abu Dhabi] ; NYU System (NYU)","Laboratoire d'Océanographie et du Climat : Expérimentations et Approches Numériques (LOCEAN) ; Muséum National d'Histoire Naturelle (MNHN)-Institut de Recherche pour le Développement (IRD)-Institut national des sciences de l'Univers (INSU - CNRS)-Sorbonne Université (SU)-Centre National de la Recherche Scientifique (CNRS)","University of Washington [Seattle]","Université Libre de Bruxelles [Bruxelles] (ULB)","ANR-09-CEXC-0006,FOCEA,Excellence pour une synergie observation-modèle et l'évaluation de l'état écologique de la Méditerranée Nord Occidentale(2009)","ANR: 11-IDEX-0001,Amidex,INITIATIVE D'EXCELLENCE AIX MARSEILLE UNIVERSITE(2011)","ANR: 11-LABX-0061,OTMed,Objectif Terre : Bassin Méditerranéen(2011)","European Project: 624170,EC:FP7:PEOPLE,FP7-PEOPLE-2013-IEF,SEAQUEST(2015)","Institut méditerranéen d'océanologie (MIO) ; Institut de Recherche pour le Développement (IRD)-Aix Marseille Université (AMU)-Institut national des sciences de l'Univers (INSU - CNRS)-Université de Toulon (UTLN)-Centre National de la Recherche Scientifique (CNRS)","Laboratoire d'Océanographie et du Climat : Expérimentations et Approches Numériques (LOCEAN) ; Sorbonne Université (SU)-Institut national des sciences de l'Univers (INSU - CNRS)-Centre National de la Recherche Scientifique (CNRS)-Muséum national d'Histoire naturelle (MNHN)-Institut de Recherche pour le Développement (IRD)","Université libre de Bruxelles (ULB)","Processus et interactions de fine échelle océanique (PROTEO) ; Laboratoire d'Océanographie et du Climat : Expérimentations et Approches Numériques (LOCEAN) ; Sorbonne Université (SU)-Institut national des sciences de l'Univers (INSU - CNRS)-Centre National de la Recherche Scientifique (CNRS)-Muséum national d'Histoire naturelle (MNHN)-Institut de Recherche pour le Développement (IRD)-Sorbonne Université (SU)-Institut national des sciences de l'Univers (INSU - CNRS)-Centre National de la Recherche Scientifique (CNRS)-Muséum national d'Histoire naturelle (MNHN)-Institut de Recherche pour le Développement (IRD)","ANR-11-IDEX-0001-02/11-IDEX-0001,AMIDEX,AMIDEX(2011)"],"country":[{"code":"FR","label":"France","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"2019-10-11T09:44:42.348Z","description":["International audience; Vertical velocities can be estimated indirectly from in situ observations by theoretical frameworks like the-equation. Direct measures of vertical exchanges are challenging due to their typically ephemeral spatiotemporal scales. In this study we address this problem with an adaptive sampling strategy coupling various biophysical instruments. We analyze the 3-D organization of a cyclonic mesoscale structure finely sampled during the Observing Submesoscale Coupling At High Resolution cruise in the Ligurian Sea during fall 2015. The observations, acquired with a moving vessel profiler, highlight a subsurface low-salinity layer (≃50 m), as well as rising isopycnals, generated by geostrophic cyclonic circulation, in the structure's center. Reconstructed 3-D fields of density and horizontal velocities are used to estimate the vertical velocity field down to 250 m by applying the adiabatic QG-equation, for the first time in this region. The vertical motions are characterized by multipolar patterns of downward and upward velocities on the edges of the structure and significantly smaller vertical velocities in its center. Both the 3-D distribution of particles (size ≥100 μm), measured with a laser optical plankton counter, and the Synechococcus and Prochlorococcus abundances (cell per cubic meter) measured by flow cytometry are consistent with the 3-D velocity field. In particular, a secondary vertical recirculation is identified that upwells particles (from 250 to 100 m) along isohalines to the structure's center. Besides demonstrating the effect of vertical patterns on biogeochemical distributions, this case study suggests to use particle matter as a tracer to assess physical dynamics."],"format":["application/pdf"],"id":"50|dedup_wf_001::0d16b1714ab3077df73893a8ea57d776","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"hostedby":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795/document","https://hal-amu.archives-ouvertes.fr/hal-02124795/file/Vertical%20motions%20and%20their%20effects%20on%20a%20biogeochemical%20tracer%20in%20a%20cyclonic%20structure%20finely%20observed%20in%20the%20Ligurian%20Sea.pdf"]},{"accessright":{"code":"c_16ec","label":"RESTRICTED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|issn___print::3e65fd758a9d3e24027c0cb2822a79f2","value":"Journal of Geophysical Research Oceans"},"license":"http://onlinelibrary.wiley.com/termsAndConditions#vor","publicationdate":"2019-04-23T07:08:10Z","refereed":"UNKNOWN","type":"Article","url":["https://onlinelibrary.wiley.com/doi/pdf/10.1029/2018JC014392","https://onlinelibrary.wiley.com/doi/full-xml/10.1029/2018JC014392","http://dx.doi.org/10.1029/2018jc014392"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|issn___print::3e65fd758a9d3e24027c0cb2822a79f2","value":"Journal of Geophysical Research Oceans"},"refereed":"UNKNOWN","type":"Article","url":["https://archimer.ifremer.fr/doc/00490/60214/63567.pdf"]},{"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|issn___print::3e65fd758a9d3e24027c0cb2822a79f2","value":"Journal of Geophysical Research Oceans"},"refereed":"UNKNOWN","type":"Article","url":["https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1029/2018JC014392","https://hal-amu.archives-ouvertes.fr/hal-02124795v2","https://academic.microsoft.com/#/detail/2941945454"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::71560ce98c8250ce57a6a970c9991a5f","value":"HAL-IRD"},"hostedby":{"key":"10|opendoar____::71560ce98c8250ce57a6a970c9991a5f","value":"HAL-IRD"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"hostedby":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2019-06-07","refereed":"UNKNOWN","type":"Other literature type","url":["http://dx.doi.org/10.1029/2018jc014392"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},"hostedby":{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::18bb68e2b38e4a8ce7cf4f6b2625768c","value":"Hal-Diderot"},"hostedby":{"key":"10|opendoar____::18bb68e2b38e4a8ce7cf4f6b2625768c","value":"Hal-Diderot"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::2d2c8394e31101a261abf1784302bf75","value":"HAL AMU"},"hostedby":{"key":"10|opendoar____::2d2c8394e31101a261abf1784302bf75","value":"HAL AMU"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},"hostedby":{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::8f14e45fceea167a5a36dedd4bea2543","value":"ArchiMer - Institutional Archive of Ifremer"},"hostedby":{"key":"10|opendoar____::8f14e45fceea167a5a36dedd4bea2543","value":"ArchiMer - Institutional Archive of Ifremer"},"publicationdate":"2019-06-01","refereed":"UNKNOWN","type":"Other literature type","url":["https://archimer.ifremer.fr/doc/00490/60214/"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1603727328518,"maintitle":"Vertical motions and their effects on a biogeochemical tracer in a cyclonic structure finely observed in the Ligurian Sea","originalId":["oai:HAL:hal-02124795v1","10.1029/2018JC014392","10.1029/2018jc014392","2941945454","oai:HAL:hal-02124795v2","oai:archimer.ifremer.fr:60214"],"pid":[{"scheme":"doi","value":"10.1029/2018JC014392"}],"projects":[{"acronym":"SEAQUEST","code":"624170","funder":{"fundingStream":"FP7","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda_______::a2aa3e7ebdf06a249bbedc0cef87e602","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Meso and Sub-mesoscale Physico-biogeochemical Dynamics in a Coastal NW Mediterranean Sea: Quantifying and Understanding Ecosystem Structure and Transport"}],"publicationdate":"2019-04-23","publisher":"HAL CCSD","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"high‐resolution reconstructions of 3‐D fields"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"vertical velocities estimated with ω‐equation"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"particle distribution as a tracer for vertical advection"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"[SDU.OCEAN]Sciences of the Universe [physics]/Ocean, Atmosphere"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"high-resolution reconstructions of 3-D fields"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"vertical velocities estimated with omega-equation"}}],"type":"publication"} +{"author":[{"fullname":"Sanda Pletikosic","rank":1},{"fullname":"Mladenka Tkalčić","rank":2}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|opendoar____::2e3d2c4f33a7a1f58bc6c81cacd21e9c","value":"Repository of the University of Rijeka, Faculty of Humanities and Social Sciences"},{"key":"10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0","value":"HRČAK - Portal of scientific journals of Croatia"},{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"contributor":[],"country":[{"code":"HR","label":"Croatia","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"2020-05-09T12:33:14.336Z","description":["Irritable bowel syndrome is regarded as a biopsychosocial disorder, the result of a complex combination of predisposing, precipitating and perpetuating factors. Personality traits, affective status and stress are some of the relevant factors contributing to lower quality of life and symptom exacerbation in IBS patients. In order to examine the role of stress in IBS symptom exacerbation, the aims of this study were to explore the relationship of daily stressful events and symptom severity in a prospective manner and to explore the roles of neuroticism, anxiety, depression and stress in the vicious circle of symptom perpetuation. A total of 49 patients with IBS reported their symptom severity and daily stressful events intensity each day for 14 consecutive days. They also completed the Big five personality inventory, the Beck Depression Inventory and the State-trait anxiety inventory. Cross-correlation analyses were performed on the time series data for daily stress and symptom severity for each participant separately. Four different patterns of relationships were found in different subgroups of participants: positive cross-correlations of symptom severity and stress intensity on the same day; higher symptom severity on days following stressful days; lower symptom severity on days following stressful days; and lower stress intensity on days following severe symptoms. Using average scores for daily stress and symptom severity, as well as scores for neuroticism, anxiety and depression, we performed a path analysis to test a model of symptom exacerbation. It showed that, on the group level, average stress intensity predicts average symptom severity. Neuroticism and anxiety were not significant predictors of symptom severity, while depression showed a marginally significant relationship with symptom severity, mediated by stress intensity. In conclusion, depression and daily stress seem to be important contributors to the vicious circle of IBS symptom perpetuation.","Síndrome del intestino irritable se considera un trastorno biopsicosocial, el resultado de la combinación compleja de factores predisponentes, precipitantes y perpetuos. Rasgos de personalidad, estado afectivo y estrés son algunos de los factores relevantes que contribuyen a la calidad de vida más baja y la exacerbación sintomática de los pacientes con SII. Para examinar el papel del estrés en la exacerbación sintomática de SII, el objetivo de este estudio era explorar la relación entre los acontecimientos cotidianos estresantes y la intensidad de los síntomas de forma prospectiva, tanto como explorar los papeles del neuroticismo, ansiedad, depresión y estrés en el círculo vicioso de perpetuación de los síntomas. Todos los 49 pacientes con SII informaron sobre la intensidad de los síntomas y la intensidad de los acontecimientos cotidianos estresantes cada día durante 14 días consecutivos. Además, completaron el Test de personalidad de los cinco grandes, Inventario de depresión de Beck e Inventario de ansiedad estado-rasgo. En los datos de series temporales se hizo el análisis de correlación cruzada para la intensidad del estrés diario y de los síntomas para cada participante por separado. Se encontraron cuatro patrones de relación diferentes en diferentes subgrupos de participantes: correlación cruzada positiva de la intensidad de los síntomas y del estrés el mismo día; intensidad de los síntomas más alta en los días después de los días estresantes; intensidad de los síntomas más baja en los días después de los días estresantes e intensidad del estrés más baja en los días después de los síntomas graves. Usando resultados promedios para el estrés diario y la intensidad de los síntomas, tanto como resultados para el neuroticismo, ansiedad y depresión, realizamos un análisis del camino para examinar el modelo de la exacerbación de los síntomas. Mostró que, al nivel de grupo, la intensidad del estrés promedia predice la intensidad de los síntomas promedia. Neuroticismo y ansiedad no eran predictores significativos de la intensidad de los síntomas, mientras que la depresión demostró una relación marginalmente significante con la intensidad de los síntomas, mediada por la intensidad del estrés. En conclusión, la depresión y el estrés diario parecen ser contribuidores importantes al círculo vicioso de la perpetuación de los síntomas SII."],"format":["application/pdf"],"id":"50|dedup_wf_001::054ea3bc3ccc386d150d900776e1070d","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::2e3d2c4f33a7a1f58bc6c81cacd21e9c","value":"Repository of the University of Rijeka, Faculty of Humanities and Social Sciences"},"hostedby":{"key":"10|opendoar____::2e3d2c4f33a7a1f58bc6c81cacd21e9c","value":"Repository of the University of Rijeka, Faculty of Humanities and Social Sciences"},"license":"http://creativecommons.org/licenses/by/4.0/","publicationdate":"2016-04-01","refereed":"UNKNOWN","type":"Article","url":["https://repository.ffri.uniri.hr/islandora/object/ffri:1182","https://urn.nsk.hr/urn:nbn:hr:186:701987","https://repository.ffri.uniri.hr/islandora/object/ffri:1182/datastream/FILE0"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0","value":"HRČAK - Portal of scientific journals of Croatia"},"hostedby":{"key":"10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0","value":"HRČAK - Portal of scientific journals of Croatia"},"publicationdate":"2016-01-01","refereed":"UNKNOWN","type":"Other literature type","url":["https://hrcak.srce.hr/156331","https://hrcak.srce.hr/file/230444"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},"hostedby":{"key":"10|doajarticles::23738aeeb586d9f9b652c75aaff54dd4","value":"Psychological Topics"},"publicationdate":"2016-04-01","refereed":"UNKNOWN","type":"Article","url":["http://pt.ffri.hr/index.php/pt/article/view/317","https://doaj.org/toc/1332-0742"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1603727328518,"maintitle":"The role of stress in IBS symptom severity","originalId":["oai:repository.ffri.uniri.hr:ffri_1182","oai:hrcak.srce.hr:156331","oai:doaj.org/article:4394c5a4f62849ffac7ea485a91540a3"],"pid":[],"projects":[{"code":"009-0092660-2655","funder":{"jurisdiction":"HR","name":"Ministry of Science, Education and Sports of the Republic of Croatia (MSES)","shortName":"MZOS"},"id":"40|irb_hr______::ad496254144c27606e4058841e060fa0","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Psychosomatic aspects of chronic functional and inflammatory bowel diseases"}],"publicationdate":"2016-04-01","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"irritable bowel syndrome"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"symptom severity"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"daily stress"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"depression"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"DRUŠTVENE ZNANOSTI. Psihologija."}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"SOCIAL SCIENCES. Psychology."}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"irritable bowel syndrome; symptom severity; daily stress; depression"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"síndrome del intestino irritable; intensidad de los síntomas; estrés diario; depresión"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Psychology"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"BF1-990"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.7632"},"subject":{"scheme":"mesheuropmc","value":"macromolecular substances"}}],"type":"publication"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation deleted file mode 100644 index f88678437..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation +++ /dev/null @@ -1,9 +0,0 @@ -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"40|cihr________::1e5e62235d094afd01cd56e65112fc63"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::5ac587eb28411c351c2e357eb097fd3d","subRelType":"provision","target":"40|nwo_________::dc69ada721bf21ed51055b6421850d73"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::690b3aaf177a4c70b81bacd8d023cbdc","subRelType":"provision","target":"40|nih_________::031bb5f2f70239b3210eda38b2079f67"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::7a71f278237d1ab35088efda03fa007a","subRelType":"provision","target":"40|nsf_________::03748bcb5d754c951efec9700e18a56d"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::8b75543067b50076e70764917e188178","subRelType":"provision","target":"40|snsf________::50cb15ff7a6a3f8531f063770179e346"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::9f3ff882f023209d9ffb4dc32b77d376","subRelType":"provision","target":"40|corda_______::ffc1811633b3222e4764c7b0517f83e8"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::b566fa319c3923454e1e8eb886ab62d2","subRelType":"provision","target":"40|nhmrc_______::4e6c928fef9851b37ec73f4f6daca35b"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","subRelType":"provision","target":"40|corda__h2020::846b777af165fef7c904a81712a83b66"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"40|irb_hr______::1e5e62235d094afd01cd56e65112fc63"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/project b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/project new file mode 100644 index 000000000..61a912587 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/project @@ -0,0 +1,12 @@ +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Fotoniikka ja modernit kuvantamismenetelmät LT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"135027"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-01-25","dateoftransformation":"2019-04-16","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"190,000 €"},"originalId":["aka_________::135027"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Academy Project Funding TT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"316061"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-01-25","dateoftransformation":"2019-04-16","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"450,000 €"},"originalId":["aka_________::316061"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population."},"totalcost":0.0} +{"acronym":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"MOISE"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ANR-17-CE05-0033"},"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-12-24","dateoftransformation":"2020-01-07","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n anr_________::ANR\n ANR\n French National Research Agency (ANR)\n Agence Nationale de la Recherche\n FR\n "}],"id":"40|anr_________::1f21edc5c902be305ee47148955c6e50","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["anr_________::ANR-17-CE05-0033"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS"},"totalcost":0.0} +{"acronym":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"GALAXY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ANR-09-SEGI-0005"},"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-12-24","dateoftransformation":"2020-01-07","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n anr_________::ANR\n ANR\n French National Research Agency (ANR)\n Agence Nationale de la Recherche\n FR\n "}],"id":"40|anr_________::547e78ffdcb7d72a1ef31058dede3a33","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["anr_________::ANR-09-SEGI-0005"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"DEVELOPPEMENT COLLABORATIF DE SYSTEMES COMPLEXES SELON UNE APPROCHE GUIDEE PAR LES MODELES"},"totalcost":0.0} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LE0347462"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2015-08-24","dateoftransformation":"2018-11-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2003-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n \n arc:fundingStream\n \n "}],"id":"40|arc_________::838e781a8d479e27a11101421fd8b296","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"keywords":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"biomedical nanostructures,femtosecond laser machining,laser manufacturing,laser micromachining,microphotonics,photonic bandgap structures"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::LE0347462"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2003-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Femtosecond laser micromachining facility"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/LE0347462"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LP140100567"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2015-08-24","dateoftransformation":"2018-11-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2017-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Linkage Projects\n Linkage Projects\n Linkage Projects\n \n arc:fundingStream\n \n "}],"id":"40|arc_________::a461f180f7b6700c0499d4d3d53e58c7","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"keywords":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"EDUCATIONAL MEASUREMENT; EDUCATIONAL MEASUREMENT; HIGH-STAKES TESTING; HIGH-STAKES TESTING; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::LP140100567"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2014-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Linkage Projects - Grant ID: LP140100567"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/LP140100567"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"DP180101235"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2018-04-02","dateoftransformation":"2019-09-02","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2023-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Discovery Projects\n Discovery Projects\n Discovery Projects\n \n arc:fundingStream\n \n "}],"id":"40|arc_________::b46b9e07d4cea67ccf497520a75ad0c8","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::DP180101235"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2018-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Discovery Projects - Grant ID: DP180101235"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/DP180101235"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LE0989831"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2015-08-24","dateoftransformation":"2018-11-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2009-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n \n arc:fundingStream\n \n "}],"id":"40|arc_________::c5f86314ce288f91a7f31c219b128fab","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"keywords":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"database metadata,digital sound,electroacoustic music,film music,music,music information retrieval"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::LE0989831"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2009-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"The Australian Music Navigator: research infrastructure for discovering, accessing and analysing Australia's musical landscape"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/LE0989831"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"3120023"},"collectedfrom":[{"key":"10|openaire____::d1235f66ad3adbcf6c8faf35fa490885","value":"Comisión Nacional de Investigación Científica y Tecnológica"}],"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"IBACACHE ROJAS, JUANA"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2017-09-11","dateoftransformation":"2018-09-28","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2014-01-28"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"conicytf____::CONICYTCONICYTComisión Nacional de Investigación Científica y TecnológicaCLconicytf____::CONICYT::FONDECYT::POSTDOCTORADOFondecyt stream, POSTDOCTORADOFondecyt stream, POSTDOCTORADOconicyt:fondecytfundingsconicytf____::CONICYT::FONDECYTFONDECYTFondecyt fundingsconicyt:fondecytfundings"}],"id":"40|conicytf____::05539f3427ad605d7c1de0168f3e337f","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["conicytf____::3120023"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2011-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"SYNTHESIS AND STRUCTURE-ACTIVITY RELATIONSHIPS OF HETEROARYLISOQUINOLINE- AND PHENANTHRIDINEQUINONES AS ANTITUMOR AGENTS"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://repositorio.conicyt.cl/handle/10533/183109"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"1040240"},"collectedfrom":[{"key":"10|openaire____::d1235f66ad3adbcf6c8faf35fa490885","value":"Comisión Nacional de Investigación Científica y Tecnológica"}],"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ROMERO FIGUEROA, JULIO"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2017-09-11","dateoftransformation":"2018-09-28","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2007-01-15"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"conicytf____::CONICYTCONICYTComisión Nacional de Investigación Científica y TecnológicaCLconicytf____::CONICYT::FONDECYT::REGULARFondecyt stream, REGULARFondecyt stream, REGULARconicyt:fondecytfundingsconicytf____::CONICYT::FONDECYTFONDECYTFondecyt fundingsconicyt:fondecytfundings"}],"id":"40|conicytf____::96b47b91a6c061e31f626612b1650c03","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["conicytf____::1040240"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2004-01-15"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ESTUDIO TEORICO-EXPERIMENTAL DE LA PERMEACION DE FLUIDOS SUPERCRITICOS Y LA SEPARACION DE MEZCLAS A ALTA PRESION A TRAVES DE MEMBRANAS MICROPOROSAS."},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://repositorio.conicyt.cl/handle/10533/163340"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"1020683"},"collectedfrom":[{"key":"10|openaire____::d1235f66ad3adbcf6c8faf35fa490885","value":"Comisión Nacional de Investigación Científica y Tecnológica"}],"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"MANRIQUEZ CASTRO, VICTOR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2017-09-11","dateoftransformation":"2018-09-28","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2006-01-15"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"conicytf____::CONICYTCONICYTComisión Nacional de Investigación Científica y TecnológicaCLconicytf____::CONICYT::FONDECYT::REGULARFondecyt stream, REGULARFondecyt stream, REGULARconicyt:fondecytfundingsconicytf____::CONICYT::FONDECYTFONDECYTFondecyt fundingsconicyt:fondecytfundings"}],"id":"40|conicytf____::b122147e0a13f34cdb6311a9d714f9a5","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["conicytf____::1020683"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2002-01-15"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"SINTESIS Y CARACTERIZACION DE SALES CUATERNARIAS CON EL ANION CALCOFOSFATO [P2Qy]4- (Q=S,Se;y=6,7) PROPIEDADES FISICAS Y REACCIONES DE INCLUSION."},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://repositorio.conicyt.cl/handle/10533/162452"}} +{"acronym":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ANIM"},"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"FP7-PEOPLE-2013-IIF"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"628405"},"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"contactemail":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"M.D.Davies@bristol.ac.uk"},"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Davies, Maria"},"contactphone":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"+44 117 3317352"},"contracttype":{"classid":"MC","classname":"Support for training and career development of researchers (Marie Curie)","schemeid":"ec:FP7contractTypes","schemename":"ec:FP7contractTypes"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2018-03-12","dateoftransformation":"2020-06-26","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2016-04-30"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ec__________::ECECEuropean CommissionEUec__________::EC::FP7::SP3::PEOPLEMarie-Curie ActionsPEOPLEec:programec__________::EC::FP7::SP3SP3-PeopleSP3ec:specificprogramec__________::EC::FP7SEVENTH FRAMEWORK PROGRAMMEFP7ec:frameworkprogram"}],"id":"40|corda_______::132bac68f17bb81c451d9071be6e4d6d","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"MC-IIF"},"optional2":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"https://ec.europa.eu/research/participants/portal/page/call_FP7#wlp_call_FP7"},"originalId":["corda_______::628405"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2014-05-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Precisely Defined, Surface-Engineered Nanostructures via Crystallization-Driven Self-Assembly of Linear-Dendritic Block Copolymers"},"totalcost":0.0} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relations.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relation similarity index 95% rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relations.json rename to dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relation index 6478a01ae..fe14fccf6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relations.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relation @@ -22,8 +22,8 @@ {"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::cafe7980294aa5f935f433e7c8aab844","subRelType":"provision","target":"20|dedup_wf_001::2806db65ba8029ee196679cad067eff2"} {"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::0cd5965141113df5739f1ac7ac7f6d37","subRelType":"participation","target":"40|aka_________::1bc716a1763110da3eb1af867de718a8"} {"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::1e2df822bf0932ad0f77565789f22e17","subRelType":"participation","target":"40|aka_________::a6c805bcfd383bae043d8df38e79db78"} -{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8966"},"lastupdatetimestamp":1603564783812,"properties":[],"relClass":"isProducedBy","relType":"resultOrganization","target":"20|aka_________::2c3aab6bce7516338b4dbfb4f6f86db7","subRelType":"affiliation","source":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585"} -{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::8bcd11b6bffc76f4d4f88d7a6728d614","subRelType":"participation","target":"40|aka_________::645123c3fe7bab557c36f0f9bb02a4cd"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8966"},"lastupdatetimestamp":1603564783812,"properties":[],"relClass":"isProducedBy","relType":"resultOrganization","target":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","subRelType":"affiliation","source":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8966"},"lastupdatetimestamp":1603564783812,"properties":[],"relClass":"isProducedBy","relType":"resultOrganization","target":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","subRelType":"affiliation","source":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585"} {"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::8bcd11b6bffc76f4d4f88d7a6728d614","subRelType":"participation","target":"40|aka_________::fecf4f862a6b40dd2ccb1abc8fed5bc5"} {"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::a30b9a45766293af38951d767e77a471","subRelType":"participation","target":"40|aka_________::68ce7288b1b036f73a1ff951c6524eba"} {"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::a30b9a45766293af38951d767e77a471","subRelType":"participation","target":"40|aka_________::b733217d1cd609001dd3c75af419d872"} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/project b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/project new file mode 100644 index 000000000..61a912587 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/project @@ -0,0 +1,12 @@ +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Fotoniikka ja modernit kuvantamismenetelmät LT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"135027"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-01-25","dateoftransformation":"2019-04-16","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"190,000 €"},"originalId":["aka_________::135027"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Academy Project Funding TT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"316061"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-01-25","dateoftransformation":"2019-04-16","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"450,000 €"},"originalId":["aka_________::316061"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population."},"totalcost":0.0} +{"acronym":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"MOISE"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ANR-17-CE05-0033"},"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-12-24","dateoftransformation":"2020-01-07","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n anr_________::ANR\n ANR\n French National Research Agency (ANR)\n Agence Nationale de la Recherche\n FR\n "}],"id":"40|anr_________::1f21edc5c902be305ee47148955c6e50","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["anr_________::ANR-17-CE05-0033"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS"},"totalcost":0.0} +{"acronym":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"GALAXY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ANR-09-SEGI-0005"},"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2019-12-24","dateoftransformation":"2020-01-07","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n anr_________::ANR\n ANR\n French National Research Agency (ANR)\n Agence Nationale de la Recherche\n FR\n "}],"id":"40|anr_________::547e78ffdcb7d72a1ef31058dede3a33","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["anr_________::ANR-09-SEGI-0005"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"DEVELOPPEMENT COLLABORATIF DE SYSTEMES COMPLEXES SELON UNE APPROCHE GUIDEE PAR LES MODELES"},"totalcost":0.0} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LE0347462"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2015-08-24","dateoftransformation":"2018-11-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2003-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n \n arc:fundingStream\n \n "}],"id":"40|arc_________::838e781a8d479e27a11101421fd8b296","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"keywords":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"biomedical nanostructures,femtosecond laser machining,laser manufacturing,laser micromachining,microphotonics,photonic bandgap structures"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::LE0347462"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2003-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Femtosecond laser micromachining facility"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/LE0347462"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LP140100567"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2015-08-24","dateoftransformation":"2018-11-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2017-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Linkage Projects\n Linkage Projects\n Linkage Projects\n \n arc:fundingStream\n \n "}],"id":"40|arc_________::a461f180f7b6700c0499d4d3d53e58c7","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"keywords":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"EDUCATIONAL MEASUREMENT; EDUCATIONAL MEASUREMENT; HIGH-STAKES TESTING; HIGH-STAKES TESTING; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::LP140100567"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2014-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Linkage Projects - Grant ID: LP140100567"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/LP140100567"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"DP180101235"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2018-04-02","dateoftransformation":"2019-09-02","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2023-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Discovery Projects\n Discovery Projects\n Discovery Projects\n \n arc:fundingStream\n \n "}],"id":"40|arc_________::b46b9e07d4cea67ccf497520a75ad0c8","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::DP180101235"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2018-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Discovery Projects - Grant ID: DP180101235"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/DP180101235"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LE0989831"},"collectedfrom":[{}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2015-08-24","dateoftransformation":"2018-11-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2009-12-31"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"\n \n arc_________::ARC\n ARC\n Australian Research Council (ARC)\n AU\n \n \n arc_________::ARC::Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n Linkage Infrastructure, Equipment and Facilities\n \n arc:fundingStream\n \n "}],"id":"40|arc_________::c5f86314ce288f91a7f31c219b128fab","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"keywords":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"database metadata,digital sound,electroacoustic music,film music,music,music information retrieval"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["arc_________::LE0989831"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2009-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"The Australian Music Navigator: research infrastructure for discovering, accessing and analysing Australia's musical landscape"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://purl.org/au-research/grants/arc/LE0989831"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"3120023"},"collectedfrom":[{"key":"10|openaire____::d1235f66ad3adbcf6c8faf35fa490885","value":"Comisión Nacional de Investigación Científica y Tecnológica"}],"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"IBACACHE ROJAS, JUANA"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2017-09-11","dateoftransformation":"2018-09-28","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2014-01-28"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"conicytf____::CONICYTCONICYTComisión Nacional de Investigación Científica y TecnológicaCLconicytf____::CONICYT::FONDECYT::POSTDOCTORADOFondecyt stream, POSTDOCTORADOFondecyt stream, POSTDOCTORADOconicyt:fondecytfundingsconicytf____::CONICYT::FONDECYTFONDECYTFondecyt fundingsconicyt:fondecytfundings"}],"id":"40|conicytf____::05539f3427ad605d7c1de0168f3e337f","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["conicytf____::3120023"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2011-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"SYNTHESIS AND STRUCTURE-ACTIVITY RELATIONSHIPS OF HETEROARYLISOQUINOLINE- AND PHENANTHRIDINEQUINONES AS ANTITUMOR AGENTS"},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://repositorio.conicyt.cl/handle/10533/183109"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"1040240"},"collectedfrom":[{"key":"10|openaire____::d1235f66ad3adbcf6c8faf35fa490885","value":"Comisión Nacional de Investigación Científica y Tecnológica"}],"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ROMERO FIGUEROA, JULIO"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2017-09-11","dateoftransformation":"2018-09-28","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2007-01-15"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"conicytf____::CONICYTCONICYTComisión Nacional de Investigación Científica y TecnológicaCLconicytf____::CONICYT::FONDECYT::REGULARFondecyt stream, REGULARFondecyt stream, REGULARconicyt:fondecytfundingsconicytf____::CONICYT::FONDECYTFONDECYTFondecyt fundingsconicyt:fondecytfundings"}],"id":"40|conicytf____::96b47b91a6c061e31f626612b1650c03","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["conicytf____::1040240"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2004-01-15"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ESTUDIO TEORICO-EXPERIMENTAL DE LA PERMEACION DE FLUIDOS SUPERCRITICOS Y LA SEPARACION DE MEZCLAS A ALTA PRESION A TRAVES DE MEMBRANAS MICROPOROSAS."},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://repositorio.conicyt.cl/handle/10533/163340"}} +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"1020683"},"collectedfrom":[{"key":"10|openaire____::d1235f66ad3adbcf6c8faf35fa490885","value":"Comisión Nacional de Investigación Científica y Tecnológica"}],"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"MANRIQUEZ CASTRO, VICTOR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2017-09-11","dateoftransformation":"2018-09-28","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2006-01-15"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"conicytf____::CONICYTCONICYTComisión Nacional de Investigación Científica y TecnológicaCLconicytf____::CONICYT::FONDECYT::REGULARFondecyt stream, REGULARFondecyt stream, REGULARconicyt:fondecytfundingsconicytf____::CONICYT::FONDECYTFONDECYTFondecyt fundingsconicyt:fondecytfundings"}],"id":"40|conicytf____::b122147e0a13f34cdb6311a9d714f9a5","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"originalId":["conicytf____::1020683"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2002-01-15"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"SINTESIS Y CARACTERIZACION DE SALES CUATERNARIAS CON EL ANION CALCOFOSFATO [P2Qy]4- (Q=S,Se;y=6,7) PROPIEDADES FISICAS Y REACCIONES DE INCLUSION."},"totalcost":0.0,"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"http://repositorio.conicyt.cl/handle/10533/162452"}} +{"acronym":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ANIM"},"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"FP7-PEOPLE-2013-IIF"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"628405"},"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"contactemail":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"M.D.Davies@bristol.ac.uk"},"contactfullname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Davies, Maria"},"contactphone":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"+44 117 3317352"},"contracttype":{"classid":"MC","classname":"Support for training and career development of researchers (Marie Curie)","schemeid":"ec:FP7contractTypes","schemename":"ec:FP7contractTypes"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2018-03-12","dateoftransformation":"2020-06-26","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2016-04-30"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"ec__________::ECECEuropean CommissionEUec__________::EC::FP7::SP3::PEOPLEMarie-Curie ActionsPEOPLEec:programec__________::EC::FP7::SP3SP3-PeopleSP3ec:specificprogramec__________::EC::FP7SEVENTH FRAMEWORK PROGRAMMEFP7ec:frameworkprogram"}],"id":"40|corda_______::132bac68f17bb81c451d9071be6e4d6d","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"{}"},"lastupdatetimestamp":1594398578323,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"MC-IIF"},"optional2":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"https://ec.europa.eu/research/participants/portal/page/call_FP7#wlp_call_FP7"},"originalId":["corda_______::628405"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"2014-05-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"Precisely Defined, Surface-Engineered Nanostructures via Crystallization-Driven Self-Assembly of Linear-Dendritic Block Copolymers"},"totalcost":0.0} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/relations.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/relation similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/relations.json rename to dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/relation diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/relation/relation_validated b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/relation/relation_validated new file mode 100644 index 000000000..e0e03fc63 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/relation/relation_validated @@ -0,0 +1,30 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"50|dedup_wf_001::2899e571609779168222fdeb59cb916d", "validated":true, "validationDate":"2021-08-06"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"40|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"50|dedup_wf_001::396262ee936f3d3e26ff0e60bea6cae0", "validated":true, "validationDate":"2021-08-06"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::690b3aaf177a4c70b81bacd8d023cbdc","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::7a71f278237d1ab35088efda03fa007a","subRelType":"provision","target":"20|doajarticles::03748bcb5d754c951efec9700e18a56d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::8b75543067b50076e70764917e188178","subRelType":"provision","target":"20|doajarticles::50cb15ff7a6a3f8531f063770179e346"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::9f3ff882f023209d9ffb4dc32b77d376","subRelType":"provision","target":"20|doajarticles::ffc1811633b3222e4764c7b0517f83e8"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::b566fa319c3923454e1e8eb886ab62d2","subRelType":"provision","target":"20|dedup_wf_001::4e6c928fef9851b37ec73f4f6daca35b"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","subRelType":"provision","target":"20|dedup_wf_001::846b777af165fef7c904a81712a83b66"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::fbf7592ddbf2ad3cc0ed70c0f2e1d67c","subRelType":"provision","target":"20|dedup_wf_001::1b965e2c0c53e5526d269d63bcfa0ae6"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::fd4c399077127f0ba09b5205e2b78406","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|infrastruct_::f66f1bd369679b5b077dcdf006089556","value":"OpenAIRE"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1594398578323,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|openaire____::8f991165fae922e29ad55d592f568464","subRelType":"provision","target":"20|openaire____::ec653e804967133b9436fdd30d3ff51d"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::15231a7ce4ba789d13b722cc5c955834","subRelType":"provision","target":"20|dedup_wf_001::1ea4bcb1bae8c6befef1e7f1230f0f10"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::16d11e9595188dbad0418a85f0351aba","subRelType":"provision","target":"20|opendoar____::041abd8c990fc531ab9bd2674a0e2725"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::46d3f6029f6170ebccb28945964d09bf","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::7501e5d4da87ac39d782741cd794002d","subRelType":"provision","target":"20|dedup_wf_001::04e2c34ef4daa411ff2497afc807b612"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::75b9b6dc7fe44437c6e0a69fd863dbab","subRelType":"provision","target":"20|dedup_wf_001::ad30fbc9b3b6f5370e59e58c456b7e19"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::d35b05a832e2bb91f110d54e34e2da79","subRelType":"provision","target":"20|opendoar____::589618708434cfc5b830601ac4b339ee"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::fc2e6a440b94f64831840137698021e1","subRelType":"provision","target":"20|opendoar____::a82c7e358792e0018235b7f196fec4ed"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::48b3cc4b8c5951621730829c60b1c205","subRelType":"provision","target":"20|dedup_wf_001::5bc6fca7649010470f1cc11f6675ffb3"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::8e7a079ee6d2d4933db9b898c789c2f8","subRelType":"provision","target":"20|dedup_wf_001::0b0ae5b38aedc082a8b089abdf3b752f"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::b26c86eba2b3ad8b242a93b581e6ec8e","subRelType":"provision","target":"20|re3data_____::e841a40265d8d0a6739ac71f56328da3"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::cafe7980294aa5f935f433e7c8aab844","subRelType":"provision","target":"20|dedup_wf_001::2806db65ba8029ee196679cad067eff2"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::0cd5965141113df5739f1ac7ac7f6d37","subRelType":"participation","target":"40|aka_________::1bc716a1763110da3eb1af867de718a8"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::1e2df822bf0932ad0f77565789f22e17","subRelType":"participation","target":"40|aka_________::a6c805bcfd383bae043d8df38e79db78"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8966"},"lastupdatetimestamp":1595258695262,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|aka_________::2c3aab6bce7516338b4dbfb4f6f86db7","subRelType":"affiliation","target":"50|dedup_wf_001::02859c30f6c8bfbdd8c427068a6ec684"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::8bcd11b6bffc76f4d4f88d7a6728d614","subRelType":"participation","target":"40|aka_________::645123c3fe7bab557c36f0f9bb02a4cd"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::8bcd11b6bffc76f4d4f88d7a6728d614","subRelType":"participation","target":"40|aka_________::fecf4f862a6b40dd2ccb1abc8fed5bc5"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::a30b9a45766293af38951d767e77a471","subRelType":"participation","target":"40|aka_________::b733217d1cd609001dd3c75af419d872"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::a30b9a45766293af38951d767e77a471","subRelType":"participation","target":"40|aka_________::c33dee8231ad0374caf93e52c5a473e5"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::ddcb6d65425e4531a11c610488d42d81","subRelType":"participation","target":"40|aka_________::4d7027ac28c8ac9cc72b062ac4992b4e"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance new file mode 100644 index 000000000..92d7e53c0 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1628257970612,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","dateoftransformation":"2020-03-23T00:26:59.078Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-23T00:20:51.392Z","altered":true,"baseURL":"http%3A%2F%2Fzookeys.pensoft.net%2Foai.php","identifier":"10.3897/oneeco.2.e13718","datestamp":"2017-09-08","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":[{"id": "influence", "unit": [{"value": "1.62759106106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.22519296", "key": "score"}]}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":[],"affiliation":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[{"dataInfo":null, "classid":"IT", "classname":"Italy","schemeid":"fake","schemename":"fake"}],"subject":[{"value":"Ecosystem Services hotspots","qualifier":{"classid":"ACM","classname":"ACM Computing Classification System","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Natura 2000","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Quiet Protected Areas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Biodiversity","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Agriculture","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Elevation","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Slope","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecosystem Service trade-offs and synergies","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":" cultural services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"provisioning services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"regulating services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"supporting services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ecosystem Service capacity is higher in areas of multiple designation types","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Pensoft Publishers","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"One Ecosystem 2: e13718","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[{"value":"text/html","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":"green"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem","dataInfo":null},"url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null},"pid":[],"alternateIdentifier":[{"value":"10.3897/oneeco.2.e13718","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_validatedRels/project b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_validatedRels/project new file mode 100644 index 000000000..2c234e939 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_validatedRels/project @@ -0,0 +1,23 @@ +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","originalId":["aka_________::123455"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"123455","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Business services for rural bioenergy entrepreneurship in Finland: a network analysis approach","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Yleiset tutkimusmäärärahat BY","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"144,000 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::03376222b28a3aebf2730ac514818d04","originalId":["aka_________::119027"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"119027","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"EGFR Tyrosine Kinase Inhibitors and LKB1 Tumor Suppressor in Non-Small-Cell Lung Cancer","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Researcher training and research abroad TT","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"ec__________::ECECEuropean CommissionEUec__________::EC::H2020::CS2-RIAResearch and Innovation actionCS2-RIAec:h2020toasec__________::EC::H2020H2020Horizon 2020 Framework Programmeec:h2020fundings","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"13,400 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::03f9d81b498caa15a2f7c63e5d8f2a19","originalId":["aka_________::215287"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"215287","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Finnish investigations and transloation of M.A. Sholohov's work","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Tutkijainvaihto Suomeen KY","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"1,060 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::04abf8df3fabc97e514155ca9bfbc506","originalId":["aka_________::206360"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"206360","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"The Neural Modelling of Speech Processing in the Human Brain","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"General application for research appropriations KY","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"140,000 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::069059f20d432626d44ece1f5046d1ab","originalId":["aka_________::104871"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"104871","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Liikkeen näkemisen biofysiikka ja matemaattinen mallintaminen","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"General application for research appropriations LT","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"120,000 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::07773d0048e3496021cfc4e9b196d9ab","originalId":["aka_________::78827"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"78827","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Language and social action: a four- nation comparative study of affiliation and disaffiliation in Danish, English, Finnish, German and Swedish","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Hankerahoitusyhteistyö yhteiskuntatieteellisillä aloilla KY","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"32,611 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::07872f3ac63f20519430b9b8766501b0","originalId":["aka_________::312514"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"312514","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Molecular polyhedra and metal clusters in gas phase: stuructures, properties and supramolecular catalysis in confined space","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Academy Research Fellows: follow-on funding for research costs, invited applicants only LT","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"140,000 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::0b97302e25b0663b527ba0a87a3e40df","originalId":["aka_________::260014"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"260014","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Picking Digital Pockets / Consortium: PDP","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Academy project LT","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"372,624 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::0cc726a7d4635c97d00884a62413011e","originalId":["aka_________::307856"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"307856","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Community defence responses in food spoilage lactic acid bacteria, from restriction modification to complex interactomes in modern foods / Consortium: CODELAB","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Academy Project Funding BY","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"350,000 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::0ec1d898c0231d74800ab835a0043364","originalId":["aka_________::114035"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"114035","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Self-Extending Lexicons using Analogical Learning","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Postdoctoral researcher's project KY","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"180,000 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::0ec32fb5c52c82ac61e2e5675da3873f","originalId":["aka_________::135344"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"135344","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Advanced topics in Samoyed linguistics","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Research costs of Academy Research Fellows 2009 KY","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"39,440 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::11f2cadcda5de9a675504990a45446ca","originalId":["aka_________::206739"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"206739","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Transformation groups","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"General application for research appropriations LT","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"120,000 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::1e1bb730923395e83c52daa77fcaeebe","originalId":["aka_________::129479"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"129479","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"The health effects of a diet rich in plant-based foods and fish. Focus on Nordic food. / Consortium: NORRDIET","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"SALVE TT","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"363,480 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::22cf08edbe21df883cef311dbb1b2113","originalId":["aka_________::283501"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"283501","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Finnish Grid and Cloud Infrastructure (FGCI) / Consortium: FGCI","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"FIRI 2014 research infrastructure call TIK","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"69,305 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::24bdeefc4cf283b069e5156c19ef0402","originalId":["aka_________::109514"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"109514","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Modeling of Biomolecular Systems: From Nanoengineering Applications to Lipid/Protein Complexes","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Kevät/Tutkijankoulutus ja tutkijoiden työskentely ulkomailla LT","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"18,800 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::262464c5012372b2b9ed50b67f2cdcb3","originalId":["aka_________::202051"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"202051","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Testing of probiotic strains with bacteriocin production to prevent or to cure diseases of digestive system.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Researcher exchange to Finland BY","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"1,620 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::2719be7b6ed0ad59ee90ec06dc7a290d","originalId":["aka_________::77374"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"77374","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Electrophoretic investigation of humic substances from polluted and prastine soils of Russia and Finlan d","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Tutkijanvaihto ulkomaille /kevät BY","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"824 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::295b5d1a1f98d2c6c64569ed889378d7","originalId":["aka_________::309990"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"309990","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"H2.BIO: A second generation enzyme-driven hydrogen production system","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Mobility from Finland LT","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"17,600 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::2ff5531f5e0ac4882a383d9cce6d3720","originalId":["aka_________::275608"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"275608","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Defining unknown reactivity in the ambient air of Boreal and Arctic environments","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Academy Research Fellow BY","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"434,485 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::32e954352127293334ab293adacdb5b7","originalId":["aka_________::208079"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"208079","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Invitation of 19 Russian researchers to Finland","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Work of foreign researcher´s in Finland LT","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"21,000 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::346f122f82fb83e1d70ef509460d0850","originalId":["aka_________::319347"],"pid":[],"dateofcollection":"2019-01-23","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"319347","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"The Lobbyist. A Socio-Legal Inquiry of Interest Representation in the EU","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Academy Research Fellows: initial funding for research costs, invited applicants only KY","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"37,452 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::3814ce3e74c3317a2cc50c995fe12824","originalId":["aka_________::211987"],"pid":[],"dateofcollection":"2018-03-21","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"211987","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"GSForest/Graduate School in Forest Sciences","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"Subsidy to graduate schools and national researcher training courses BY","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"98,000 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1590964602807,"id":"40|aka_________::383058c90d19b8a47700360ee9739457","originalId":["aka_________::320207"],"pid":[],"dateofcollection":"2019-01-24","dateoftransformation":"2020-03-05","extraInfo":[],"oaiprovenance":null,"websiteurl":null,"code":{"value":"320207","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"acronym":null,"title":{"value":"Circular Economy Catalysts: From Innovation to Business Ecosystems / Consortium: CICAT2025","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"startdate":null,"enddate":null,"callidentifier":{"value":"SRC 2018 Keys to Sustainable Growth, supplementary call, second stage STN","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"keywords":null,"duration":{"value":"0","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsc39":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"oamandatepublications":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecarticle29_3":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"subjects":[],"fundingtree":[{"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n ","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contracttype":null,"optional1":{"value":"536,503 €","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"optional2":null,"jsonextrainfo":{"value":"{}","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900000000000000022","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"contactfullname":null,"contactfax":null,"contactphone":null,"contactemail":null,"summary":null,"currency":null,"totalcost":0.0,"fundedamount":0.0} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_validatedRels/relation b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_validatedRels/relation new file mode 100644 index 000000000..d829e66ab --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_validatedRels/relation @@ -0,0 +1,8 @@ +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","source":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb","validated":true, "validationDate":"2021-08-06"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::03376222b28a3aebf2730ac514818d04","subRelType":"outcome","source":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb","validated":true, "validationDate":"2021-08-05"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395839728,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","source":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80","validated":true, "validationDate":"2021-08-04"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395836359,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7713935a3e4c65f30fcc6b064f212051","subRelType":"outcome","source":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80","validated":true, "validationDate":"2021-08-03"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","source":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e","validated":true, "validationDate":"2021-08-02"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","source":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e","validated":true, "validationDate":"2021-08-01"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","source":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf","validated":true, "validationDate":"2021-07-31"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","source":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf","validated":true, "validationDate":"2021-07-30"} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 433c88093..3d6c3b0b7 100644 --- a/pom.xml +++ b/pom.xml @@ -741,7 +741,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.15] + 2.7.17-SNAPSHOT [4.0.3] [6.0.5] [3.1.6] From eff499af9f869172ce1b63814f42dc4ce0fd0ac7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 11:12:30 +0200 Subject: [PATCH 146/287] added new tests and changed the test example --- .../dhp/oa/graph/dump/DumpJobTest.java | 56 ++++++++++++++++++- .../resultDump/publication_extendedinstance | 2 +- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 39454192e..d7c247499 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -308,6 +308,60 @@ public class DumpJobTest { //TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) + } + + @Test + public void testPublicationExtendedInstance2Community() throws JsonProcessingException { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance") + .getPath(); + + final String communityMapPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); + + DumpProducts dump = new DumpProducts(); + dump + .run(false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + + Assertions.assertEquals(1, verificationDataset.count()); + + Assertions.assertEquals(1, verificationDataset.filter("type = 'publication'").count()); + + //the common fields in the result have been checked with the test below. Now checking only + // community specific fields + + CommunityResult cr = verificationDataset.first(); + + Assertions.assertEquals(1, cr.getContext().size()); + Assertions.assertEquals("dh-ch", cr.getContext().get(0).getCode()); + Assertions.assertEquals("Digital Humanities and Cultural Heritage", cr.getContext().get(0).getLabel()); + Assertions.assertEquals(1, cr.getContext().get(0).getProvenance().size()); + Assertions.assertEquals("Inferred by OpenAIRE", cr.getContext().get(0).getProvenance().get(0).getProvenance()); + Assertions.assertEquals("0.9", cr.getContext().get(0).getProvenance().get(0).getTrust()); + + Assertions.assertEquals(1, cr.getCollectedfrom().size()); + Assertions.assertEquals("10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getCollectedfrom().get(0).getKey()); + Assertions.assertEquals("Pensoft", cr.getCollectedfrom().get(0).getValue()); + + Assertions.assertEquals(1, cr.getInstance().size()); + Assertions.assertEquals("10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getInstance().get(0).getCollectedfrom().getKey()); + Assertions.assertEquals("Pensoft", cr.getInstance().get(0).getCollectedfrom().getValue()); + Assertions.assertEquals("10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd", cr.getInstance().get(0).getHostedby().getKey()); + Assertions.assertEquals("One Ecosystem",cr.getInstance().get(0).getHostedby().getValue()); + + } @Test @@ -337,7 +391,6 @@ public class DumpJobTest { .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); Assertions.assertEquals(1, verificationDataset.count()); - verificationDataset.show(false); GraphResult gr = verificationDataset.first(); @@ -472,6 +525,7 @@ public class DumpJobTest { .get(ModelConstants.ACCESS_RIGHT_OPEN))); Assertions.assertTrue(instance.getAccessright().getLabel().equals(ModelConstants.ACCESS_RIGHT_OPEN)); Assertions.assertTrue(instance.getAccessright().getOpenAccessRoute().equals(OpenAccessRoute.green)); + Assertions.assertTrue(instance.getType().equals("Article")); Assertions.assertEquals(2, instance.getUrl().size()); Assertions.assertTrue(instance.getUrl().contains("https://doi.org/10.3897/oneeco.2.e13718") && instance.getUrl().contains("https://oneecosystem.pensoft.net/article/13718/")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance index 92d7e53c0..043be7b1a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance @@ -1 +1 @@ -{"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1628257970612,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","dateoftransformation":"2020-03-23T00:26:59.078Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-23T00:20:51.392Z","altered":true,"baseURL":"http%3A%2F%2Fzookeys.pensoft.net%2Foai.php","identifier":"10.3897/oneeco.2.e13718","datestamp":"2017-09-08","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":[{"id": "influence", "unit": [{"value": "1.62759106106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.22519296", "key": "score"}]}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":[],"affiliation":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[{"dataInfo":null, "classid":"IT", "classname":"Italy","schemeid":"fake","schemename":"fake"}],"subject":[{"value":"Ecosystem Services hotspots","qualifier":{"classid":"ACM","classname":"ACM Computing Classification System","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Natura 2000","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Quiet Protected Areas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Biodiversity","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Agriculture","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Elevation","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Slope","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecosystem Service trade-offs and synergies","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":" cultural services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"provisioning services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"regulating services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"supporting services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ecosystem Service capacity is higher in areas of multiple designation types","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Pensoft Publishers","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"One Ecosystem 2: e13718","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[{"value":"text/html","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":"green"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem","dataInfo":null},"url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null},"pid":[],"alternateIdentifier":[{"value":"10.3897/oneeco.2.e13718","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}} \ No newline at end of file +{"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1628257970612,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","dateoftransformation":"2020-03-23T00:26:59.078Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-23T00:20:51.392Z","altered":true,"baseURL":"http%3A%2F%2Fzookeys.pensoft.net%2Foai.php","identifier":"10.3897/oneeco.2.e13718","datestamp":"2017-09-08","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"measures":[{"id": "influence", "unit": [{"value": "1.62759106106e-08", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "0.22519296", "key": "score"}]}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":[],"affiliation":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[{"dataInfo":null, "classid":"IT", "classname":"Italy","schemeid":"fake","schemename":"fake"}],"subject":[{"value":"Ecosystem Services hotspots","qualifier":{"classid":"ACM","classname":"ACM Computing Classification System","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Natura 2000","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Quiet Protected Areas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Biodiversity","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Agriculture","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Elevation","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Slope","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecosystem Service trade-offs and synergies","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":" cultural services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"provisioning services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"regulating services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"supporting services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ecosystem Service capacity is higher in areas of multiple designation types","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Pensoft Publishers","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"One Ecosystem 2: e13718","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[{"value":"text/html","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"}],"id":"dh-ch"}],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":"green"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem","dataInfo":null},"url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null},"pid":[],"alternateIdentifier":[{"value":"10.3897/oneeco.2.e13718","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}} \ No newline at end of file From b7079804cb4e55b1d027c5e1857bfc8f000055d7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 11:34:35 +0200 Subject: [PATCH 147/287] CrossrefDump - put token among the parameters --- .../doiboost/crossref_dump_reader/oozie_app/workflow.xml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index 2d3ad6bc3..6e4f17912 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -22,6 +22,10 @@ crossrefDumpPath the Crossref dump path + + crossrefdumptoken + the token for the API dump path + @@ -37,7 +41,7 @@ - + @@ -69,6 +73,7 @@ ${url} ${crossrefDumpPath} ${crossrefdumpfilename} + ${crossrefdumptoken} HADOOP_USER_NAME=${wf:user()} download.sh From 54a6cbb24497ae4aa52b93a33f961ce6d571bcfd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 11:41:10 +0200 Subject: [PATCH 148/287] CrossrefDump - put token among the parameters --- .../dhp/doiboost/crossref_dump_reader/oozie_app/download.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh index 662f5313c..2ce704283 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh @@ -1,2 +1,2 @@ #!/bin/bash -curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put $2/$3 \ No newline at end of file +curl -LSs -H "Crossref-Plus-API-Token: Bearer $4" $1 | hdfs dfs -put $2/$3 \ No newline at end of file From 964f97ed4d16db8a95555a7051f1a2d18181c418 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 9 Aug 2021 11:53:06 +0200 Subject: [PATCH 149/287] cleanup --- dhp-workflows/pom.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index cfdf36573..22ee77619 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -24,8 +24,6 @@ dhp-dedup-openaire dhp-enrichment dhp-graph-provision - - dhp-blacklist dhp-stats-update dhp-stats-promote From da20fceaf7aefbee08844e9257b47ba97e5e2961 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 11:53:45 +0200 Subject: [PATCH 150/287] removed all the part related to the crossref dump download since it is done in a separate workflow --- .../generate_dataset_params.json | 21 --- .../oozie_app/config-default.xml | 42 ------ .../oozie_app/workflow.xml | 118 ----------------- .../oozie_app/config-default.xml | 42 ------ .../downloadandunpack/oozie_app/download.sh | 2 - .../downloadandunpack/oozie_app/mock.sh | 2 - .../downloadandunpack/oozie_app/workflow.xml | 121 ------------------ .../doiboost/preprocess/oozie_app/download.sh | 2 - .../preprocess/oozie_app/workflow.xml | 76 +---------- 9 files changed, 2 insertions(+), 424 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json deleted file mode 100644 index 63e080337..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json +++ /dev/null @@ -1,21 +0,0 @@ -[ - { - "paramName": "s", - "paramLongName": "sourcePath", - "paramDescription": "the source mdstore path", - "paramRequired": true - }, - - { - "paramName": "t", - "paramLongName": "targetPath", - "paramDescription": "the target mdstore path", - "paramRequired": true - }, - { - "paramName": "m", - "paramLongName": "master", - "paramDescription": "the master name", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml deleted file mode 100644 index 508202e30..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml deleted file mode 100644 index 506d86a08..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ /dev/null @@ -1,118 +0,0 @@ - - - - crossrefDumpPath - the working dir base path - - - inputPathCrossref - the working dir base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - 2 - number of cores used by single executor - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords - --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz - --workingPath${crossrefDumpPath} - --outputPath${workingDir}/files/ - - - - - - - - yarn-cluster - cluster - SparkGenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files - --targetPath${inputPathCrossref}/crossref_ds - - - - - - - - - yarn-cluster - cluster - SparkGenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${inputPathCrossref}/crossref_ds - --targetPath${inputPathCrossref}/crossref_ds_updates - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml deleted file mode 100644 index 508202e30..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh deleted file mode 100644 index 1bb7aff1f..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh deleted file mode 100644 index 30386d613..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml deleted file mode 100644 index 91de3bfb3..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml +++ /dev/null @@ -1,121 +0,0 @@ - - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - crossrefdumpfilename - the Crossref input path - - - crossrefDumpPath - the Crossref dump path - - - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - ${jobTracker} - ${nameNode} - - - mapred.job.queue.name - ${queueName} - - - download.sh - ${url} - ${crossrefDumpPath} - ${crossrefdumpfilename} - HADOOP_USER_NAME=${wf:user()} - download.sh - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords - --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefdumpfilename} - --workingPath${crossrefDumpPath} - --outputPath${crossrefDumpPath}/files/ - - - - - - - - yarn-cluster - cluster - SparkUnpackCrossrefEntries - eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files - --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh deleted file mode 100644 index 30386d613..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index ecaeda709..3700ce5d9 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,14 +63,10 @@ - ${wf:conf('resumeFrom') eq 'Skip'} - ${wf:conf('resumeFrom') eq 'ImportCrossRef'} - ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} - ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} ${wf:conf('resumeFrom') eq 'PreProcessORCID'} - + @@ -79,67 +75,6 @@ - - - ${jobTracker} - ${nameNode} - - - mapred.job.queue.name - ${queueName} - - - download.sh - ${url} - ${crossrefDumpPath} - ${crossrefdumpfilename} - download.sh - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords - --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefdumpfilename} - --workingPath${crossrefDumpPath} - --outputPath${crossrefDumpPath}/files/ - - - - - - - - yarn-cluster - cluster - SparkUnpackCrossrefEntries - eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files - --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - - yarn-cluster @@ -166,14 +101,7 @@ - - - - - - - - + From 577f3b1ac8696abf405de9969f4efbd81d8bba95 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 9 Aug 2021 11:53:58 +0200 Subject: [PATCH 151/287] added dnet workflows responsible for the graph construction, enrichment, provision --- .../actionset_bipFinderScores.xml | 100 ++ .../dhp/actionmanager/actionset_datacite.xml | 144 +++ .../dhp/actionmanager/actionset_doiboost.xml | 200 ++++ .../actionset_h2020_classification.xml | 132 +++ .../actionset_orcidworks-no-doi.xml | 101 ++ .../dhp/actionmanager/actionset_ror.xml | 89 ++ .../dhp/provision/00_beta_graph_for_IIS.xml | 628 +++++++++++ .../dhp/provision/00_prod_graph_for_IIS.xml | 437 ++++++++ .../eu/dnetlib/dhp/provision/01_IIS.xml | 225 ++++ .../dnetlib/dhp/provision/02_beta_graph.xml | 995 ++++++++++++++++++ .../dnetlib/dhp/provision/02_prod_graph.xml | 778 ++++++++++++++ .../dnetlib/dhp/provision/03_graph2hive.xml | 74 ++ .../dnetlib/dhp/provision/04_graph2solr.xml | 99 ++ .../dnetlib/dhp/provision/05_graph2stats.xml | 100 ++ .../dhp/provision/06_publish_stats.xml | 87 ++ .../eu/dnetlib/dhp/provision/07_broker.xml | 131 +++ 16 files changed, 4320 insertions(+) create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml new file mode 100644 index 000000000..e4680a0cf --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml @@ -0,0 +1,100 @@ + +
+ + + + + +
+ + Import bipFinder scores + Import bipFinder scores + 30 + + + declares the path holding the BIP SCORE data + + bipScorePath + /data/bip/20201206 + + + + + + + declares the path holding the LATEST GRAPH dump + + latestGraphPath + /tmp/stable_ids/graph/14_graph_blacklisted + + + + + + + prepare action sets + + + [ + { + 'set' : 'bipfinder-scores', + 'jobProperty' : 'export_action_set_bipfinder-scores', + 'enablingProperty' : 'active_bipfinder-scores', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare AS for the bipFinder scores integration + + executeOozieJob + IIS + + { + 'bipScorePath':'bipScorePath', + 'inputPath':'latestGraphPath', + 'outputPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/bipfinder/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/bipfinder' + } + + build-report + + + + + + + update action sets + + + + + + + + + + + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml new file mode 100644 index 000000000..d2ea9d35f --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml @@ -0,0 +1,144 @@ + +
+ + + + + +
+ + Import Datacite ActionSet + Import InfoSpace + 30 + + + set the resume from + + resumeFrom + TransformDatacite + + + + + + + shall the datacite mapping produce the links? + + exportLinks + false + + + + + + + set the path storing the OAF Datacite records + + oafTargetPath + /data/datacite/production/datacite_oaf + + + + + + + set the input path for Datacite content + + datacitePath + /data/datacite + + + + + + + prepare action sets + + + [ + { + 'set' : 'datacite', + 'jobProperty' : 'export_action_set_datacite', + 'enablingProperty' : 'active_datacite', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare a new version of Datacite ActionSet + + executeOozieJob + IIS + + { + 'mainPath' : 'datacitePath', + 'oafTargetPath' : 'oafTargetPath', + 'exportLinks' : 'exportLinks', + 'resumeFrom' : 'resumeFrom' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_import/oozie_app', + 'sparkExecutorMemory' : '7G' + } + + build-report + + + + + + + prepare a new version of Datacite ActionSet + + executeOozieJob + IIS + + { + 'sourcePath' : 'oafTargetPath', + 'outputPath' : 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_actionset/oozie_app', + 'sparkExecutorMemory' : '7G' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210723_163342_752 + 2021-07-23T16:44:05+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml new file mode 100644 index 000000000..ce9eb8f4c --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml @@ -0,0 +1,200 @@ + +
+ + + + + +
+ + Import DOIboost + Import InfoSpace + 30 + + + set the input path for MAG + + MAGDumpPath + /data/doiboost/mag-2021-02-15 + + + + + + + set the input path for CROSSREF dump + + crossrefDumpPath + /data/doiboost/crossref/ + + + + + + + set the intermediate path used to process MAG + + intermediatePathMAG + /data/doiboost/input/mag + + + + + + + set the input path for Crossref + + inputPathCrossref + /data/doiboost/input/crossref + + + + + + + set the timestamp for the Crossref incremental harvesting + + crossrefTimestamp + 1607614921429 + + + + + + + set the input path for UnpayWall + + inputPathUnpayWall + /data/doiboost/input/unpayWall + + + + + + + set the input path for ORCID + + inputPathOrcid + /data/orcid_activities_2020/last_orcid_dataset + + + + + + + set the working path for ORCID + + workingPathOrcid + /data/doiboost/input/orcid + + + + + + + set the hostedBy map path + + hostedByMapPath + /data/doiboost/input/hostedBy/hbMap.gz + + + + + + + set the oozie workflow name from which the execution will be resumed + + resumeFrom + ConvertCrossrefToOAF + + + + + + + wait configurations + + + + + + + prepare action sets + + + [ + { + 'set' : 'doiboost', + 'jobProperty' : 'export_action_set_doiboost', + 'enablingProperty' : 'active_doiboost', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare a new version of DOIBoost + + executeOozieJob + IIS + + { + 'crossrefTimestamp' : 'crossrefTimestamp', + 'hostedByMapPath' : 'hostedByMapPath', + 'MAGDumpPath' :'MAGDumpPath', + 'inputPathMAG' : 'intermediatePathMAG', + 'inputPathCrossref' : 'inputPathCrossref', + 'crossrefDumpPath':'crossrefDumpPath', + 'inputPathUnpayWall' : 'inputPathUnpayWall', + 'inputPathOrcid' : 'inputPathOrcid', + 'outputPath' : 'outputPath', + 'workingPathOrcid':'workingPathOrcid', + 'resumeFrom' : 'resumeFrom' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/doiboost_process/oozie_app', + 'workingPath' : '/data/doiboost/process_p', + 'sparkExecutorCores' : '2', + 'sparkExecutorIntersectionMemory' : '12G', + 'sparkExecutorMemory' : '8G', + 'esServer' : '[es_server]', + 'esIndex' : 'crossref' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210714_075237_381 + 2021-07-14T09:51:46+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml new file mode 100644 index 000000000..6d29e25a1 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml @@ -0,0 +1,132 @@ + +
+ + + + + +
+ + Import H2020classification + Import H2020classification + 30 + + + sets the URL to download the project file + + projectFileURL + https://cordis.europa.eu/data/cordis-h2020projects.csv + + + + + + + sets the URL to download the programme file + + programmeFileURL + https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv + + + + + + + sets the URL to download the topics file + + topicFileURL + https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx + + + + + + + sets the name of the sheet in the topic file to be read + + sheetName + Topics + + + + + + + wait configurations + + + + + + + prepare action sets + + + [ + { + 'set' : 'h2020classification', + 'jobProperty' : 'export_action_set_h2020classification', + 'enablingProperty' : 'active_h2020classification', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare updates for the H2020 Classification + + executeOozieJob + IIS + + { + 'outputPath': 'outputPath', + 'sheetName':'sheetName', + 'projectFileURL' : 'projectFileURL', + 'programmeFileURL' : 'programmeFileURL', + 'topicFileURL':'topicFileURL' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/project/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/h2020classification', + 'postgresURL':'', + 'postgresUser':'', + 'postgresPassword':'' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210524_084803_740 + 2021-05-24T09:05:50+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml new file mode 100644 index 000000000..c5642dadc --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml @@ -0,0 +1,101 @@ + +
+ + + + + +
+ + Import Orcid + Import InfoSpace + 30 + + + set the hdfs input path + + inputPath + /data/orcid_activities_2020 + + + + + + + set the temporary path where to store the action set + + processOutputPath + /tmp/prod_provision/working_path_orcid_activities + + + + + + + prepare action sets + + + [ + { + 'set' : 'orcidworks-no-doi', + 'jobProperty' : 'export_action_set_orcidworks_no_doi', + 'enablingProperty' : 'active_orcidworks_no_doi', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare updates for the Orcid No Doi + + executeOozieJob + IIS + + { + 'workingPath' : 'inputPath', + 'processOutputPath' : 'processOutputPath', + 'outputPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/orcidnodoi_actionset/oozie_app', + 'spark2GenNoDoiDatasetMaxExecutors' : '200', + 'spark2GenNoDoiDatasetExecutorMemory' : '2G' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210713_170819_470 + 2021-07-13T17:28:26+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml new file mode 100644 index 000000000..4810fda3b --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml @@ -0,0 +1,89 @@ + +
+ + + + + +
+ + Update ROR actionset + Import Infospace + 30 + + + Set the base path containing the no_doi_dataset folder + + inputPath + /data/ror/ror-data-2021-04-06.json + + + + + + + prepare action sets + + + [ + { + 'set' : 'ror', + 'jobProperty' : 'export_action_set_ror', + 'enablingProperty' : 'active_ror', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + update the ROR actionset + + executeOozieJob + IIS + + { + 'rorJsonInputPath' : 'inputPath', + 'rorActionSetPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/ror/oozie_app', + 'workingDir': '/tmp/import_ror_actionset_prod' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210518_143542_478 + 2021-05-18T14:37:13+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml new file mode 100644 index 000000000..ef2205e32 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml @@ -0,0 +1,628 @@ + +
+ + + + + +
+ + Graph construction for IIS [BETA] + IIS + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + gsrt________,rcuk________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the target path to store the MERGED graph + + mergedGraphPath + /tmp/beta_inference/graph/01_graph_merged + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/beta_inference/graph/02_graph_raw + + + + + + + Set the target path to store the CLEANED graph + + cleanedFirstGraphPath + /tmp/beta_inference/graph/03_graph_clean_first + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/beta_inference/graph/04_graph_dedup + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/beta_inference/graph/05_graph_consistent + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/beta_inference/graph/06_graph_cleaned + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,datacite + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims_PROD + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs_PROD + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims_PROD + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs_PROD + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB_PROD + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs_PROD + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF_PROD + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF_PROD + true + + + + + + + should apply the relations id patching based on the provided idMapping on PROD? + + shouldPatchRelations_PROD + false + + + + + + + set the PROD aggregator content path + + prodContentPath + /tmp/prod_aggregator_for_beta + + + + + + + Set the path containing the PROD AGGREGATOR graph + + prodAggregatorGraphPath + /tmp/beta_inference/graph/00_prod_graph_aggregator + + + + + + + reuse cached ODF claims from the BETA aggregation system + + reuseODFClaims_BETA + true + + + + + + + reuse cached ODF records on HDFS from the BETA aggregation system + + reuseODFhdfs_BETA + true + + + + + + + reuse cached OAF claims from the BETA aggregation system + + reuseOAFClaims_BETA + true + + + + + + + reuse cached OAF records on HDFS from the BETA aggregation system + + reuseOAFhdfs_BETA + true + + + + + + + reuse cached DB content from the BETA aggregation system + + reuseDB_BETA + true + + + + + + + reuse cached OpenOrgs content from the BETA aggregation system + + reuseDBOpenorgs_BETA + true + + + + + + + reuse cached ODF content from the BETA aggregation system + + reuseODF_BETA + true + + + + + + + reuse cached OAF content from the BETA aggregation system + + reuseOAF_BETA + true + + + + + + + should apply the relations id patching based on the provided idMapping on BETA? + + shouldPatchRelations_BETA + false + + + + + + + set the BETA aggregator content path + + betaContentPath + /tmp/beta_aggregator + + + + + + + Set the path containing the BETA AGGREGATOR graph + + betaAggregatorGraphPath + /tmp/beta_inference/graph/00_beta_graph_aggregator + + + + + + + wait configurations + + + + + + + + create the BETA AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'betaAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_BETA', + 'reuseOAFClaims' : 'reuseOAFClaims_BETA', + 'reuseDB' : 'reuseDB_BETA', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA', + 'reuseODF' : 'reuseODF_BETA', + 'reuseODF_hdfs' : 'reuseODFhdfs_BETA', + 'reuseOAF' : 'reuseOAF_BETA', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', + 'contentPath' : 'betaContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_BETA', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_inference/working_dir/beta_aggregator' + } + + build-report + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'prodAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_PROD', + 'reuseOAFClaims' : 'reuseOAFClaims_PROD', + 'reuseDB' : 'reuseDB_PROD', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD', + 'reuseODF' : 'reuseODF_PROD', + 'reuseODF_hdfs' : 'reuseODFhdfs_PROD', + 'reuseOAF' : 'reuseOAF_PROD', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', + 'contentPath' : 'prodContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_PROD', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_inference/working_dir/prod_aggregator' + } + + build-report + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'betaInputGraphPath' : 'betaAggregatorGraphPath', + 'prodInputGraphPath' : 'prodAggregatorGraphPath', + 'graphOutputPath' : 'mergedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/merge_graph', + 'priority' : 'BETA' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'mergedGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_inference/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/clean_first' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/beta_inference/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'dedupGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/beta_inference/working_dir/dedup' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'consistentGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/clean' + } + + build-report + + + + + + + + wf_20210730_094240_462 + 2021-07-30T15:04:19+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml new file mode 100644 index 000000000..e5ce3d710 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml @@ -0,0 +1,437 @@ + +
+ + + + + +
+ + Graph construction for IIS [PROD NEW] + IIS + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the path containing the PROD AGGREGATOR graph + + aggregatorGraphPath + /tmp/prod_inference/graph/00_graph_aggregator + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/prod_inference/graph/01_graph_raw + + + + + + + Set the target path to store the CLEANED graph + + cleanedFirstGraphPath + /tmp/prod_inference/graph/02_graph_clean_first + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/prod_inference/graph/03_graph_dedup + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/prod_inference/graph/04_graph_consistent + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/prod_inference/graph/05_graph_cleaned + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,datacite + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs + true + + + + + + + should apply the relations id patching based on the provided idMapping? + + shouldPatchRelations + false + + + + + + + set the PROD aggregator content path + + contentPath + /tmp/prod_aggregator + + + + + + + wait configurations + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'aggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims', + 'reuseOAFClaims' : 'reuseOAFClaims', + 'reuseDB' : 'reuseDB', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs', + 'reuseODF' : 'reuseODF', + 'reuseODF_hdfs' : 'reuseODFhdfs', + 'reuseOAF' : 'reuseOAF', + 'reuseOAF_hdfs' : 'reuseOAFhdfs', + 'contentPath' : 'contentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/prod_inference/working_dir/prod_aggregator' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'aggregatorGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_inference/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/clean_first' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/prod_inference/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'dedupGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/prod_inference/working_dir/dedup' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'consistentGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/clean' + } + + build-report + + + + + + + + wf_20210719_165159_86 + 2021-07-19T20:45:09+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml new file mode 100644 index 000000000..126d5f58d --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml @@ -0,0 +1,225 @@ + +
+ + + + + +
+ + IIS main workflow V3 [PROD] + IIS + 30 + + + start + + + + + + + Set a regex of funder shortnames to exclude from the project reference processing + + referenceextraction_project_fundingclass_blacklist_regex + ^DFG::.*$|^CONICYT::.*$|^RSF::.*$|^SGOV::.*$|^GSRT::.*$|^MIUR::.*$|^INNOVIRIS::.*$|^RIF::.*$|^SFRS::.*$ + + + + + + + prepare action sets + + + [ + { + 'set' : 'iis-document-affiliation', + 'jobProperty' : 'export_action_set_id_matched_doc_organizations', + 'enablingProperty' : 'active_document_affiliation', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-projects-main', + 'jobProperty' : 'export_action_set_id_document_referencedProjects', + 'enablingProperty' : 'active_referenceextraction_project', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-datasets-main', + 'jobProperty' : 'export_action_set_id_document_referencedDatasets', + 'enablingProperty' : 'active_referenceextraction_dataset', + 'enabled' : 'true' + }, + { + 'set' : 'iis-researchinitiative', + 'jobProperty' : 'export_action_set_id_document_research_initiative', + 'enablingProperty' : 'active_referenceextraction_researchinitiative', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-similarities', + 'jobProperty' : 'export_action_set_id_document_similarities_standard', + 'enablingProperty' : 'active_documentssimilarity', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-classes', + 'jobProperty' : 'export_action_set_id_document_classes', + 'enablingProperty' : 'active_documentsclassification', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations', + 'jobProperty' : 'export_action_set_id_document_referencedDocuments', + 'enablingProperty' : 'active_citationmatching', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations-relations', + 'jobProperty' : 'export_action_set_id_citation_relations', + 'enablingProperty' : 'active_citationmatching_relations', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenceextraction-pdb', + 'jobProperty' : 'export_action_set_id_document_pdb', + 'enablingProperty' : 'active_referenceextraction_pdb', + 'enabled' : 'true' + }, + { + 'set' : 'document_software_url', + 'jobProperty' : 'export_action_set_id_document_software_url', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-software', + 'jobProperty' : 'export_action_set_id_entity_software', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-communities', + 'jobProperty' : 'export_action_set_id_document_community', + 'enablingProperty' : 'active_referenceextraction_community', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-patents', + 'jobProperty' : 'export_action_set_id_document_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-patent', + 'jobProperty' : 'export_action_set_id_entity_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-covid-19', + 'jobProperty' : 'export_action_set_id_document_covid19', + 'enablingProperty' : 'active_referenceextraction_covid19', + 'enabled' : 'true' + } + ] + + + + + + + + prepare parameters + + import_islookup_service_location + import_content_objectstores_csv + import_content_object_store_location + import_mdstore_service_location + import_dataset_mdstore_ids_csv + oozie.wf.application.path + /lib/iis/primary/snapshots/2021-06-23 + IIS + /tmp/prod_inference/graph/05_graph_cleaned + import_infospace_graph_location + + import_project_concepts_context_ids_csv + aginfra,beopen,clarin,covid-19,dariah,dh-ch,oa-pg,egi,elixir-gr,enermaps,epos,fam,fet-fp7,fet-h2020,gotriple,instruct,mes,ni,rda,science-innovation-policy,risis,rural-digital-europe,sdsn-gr,sobigdata + + + + + + + IIS main + + iisMainJobV3 + + { + 'cluster' : 'cluster', + 'oozie.wf.application.path' : 'oozie.wf.application.path', + 'referenceextraction_project_fundingclass_blacklist_regex' : 'referenceextraction_project_fundingclass_blacklist_regex', + + 'active_document_affiliation' : 'active_document_affiliation', + 'active_referenceextraction_project' : 'active_referenceextraction_project', + 'active_referenceextraction_dataset' : 'active_referenceextraction_dataset', + 'active_referenceextraction_researchinitiative' : 'active_referenceextraction_researchinitiative', + 'active_documentsclassification' : 'active_documentsclassification', + 'active_documentssimilarity' : 'active_documentssimilarity', + 'active_citationmatching' : 'active_citationmatching', + 'active_citationmatching_relations' : 'active_citationmatching_relations', + 'active_referenceextraction_pdb' : 'active_referenceextraction_pdb', + 'active_referenceextraction_software_url' : 'active_referenceextraction_software_url', + 'active_referenceextraction_community' : 'active_referenceextraction_community', + 'active_referenceextraction_patent' : 'active_referenceextraction_patent', + 'active_referenceextraction_covid19' : 'active_referenceextraction_covid19', + + 'import_content_objectstores_csv' : 'import_content_objectstores_csv', + 'import_content_object_store_location' : 'import_content_object_store_location', + 'import_mdstore_service_location' : 'import_mdstore_service_location', + 'import_islookup_service_location' : 'import_islookup_service_location', + 'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv', + 'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv', + 'import_infospace_graph_location' : 'import_infospace_graph_location', + + 'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations', + 'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets', + 'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects', + 'export_action_set_id_document_research_initiative' : 'export_action_set_id_document_research_initiative', + 'export_action_set_id_document_similarities_standard' : 'export_action_set_id_document_similarities_standard', + + 'export_action_set_id_document_referencedDocuments' : 'export_action_set_id_document_referencedDocuments', + 'export_action_set_id_document_pdb' : 'export_action_set_id_document_pdb', + 'export_action_set_id_document_software_url' : 'export_action_set_id_document_software_url', + 'export_action_set_id_entity_software' : 'export_action_set_id_entity_software', + 'export_action_set_id_document_community' : 'export_action_set_id_document_community', + 'export_action_set_id_document_patent' : 'export_action_set_id_document_patent', + 'export_action_set_id_entity_patent' : 'export_action_set_id_entity_patent', + 'export_action_set_id_document_covid19' : 'export_action_set_id_document_covid19', + 'export_action_set_id_document_classes' : 'export_action_set_id_document_classes' + } + + false + build-report + + + + + + + update action sets + + + + + + + + wf_20210719_221139_780 + 2021-07-21T01:23:13+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml new file mode 100644 index 000000000..766783f8b --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -0,0 +1,995 @@ + +
+ + + + + +
+ + Graph Construction [BETA] + Data Provision + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + gsrt________,rcuk________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the target path to store the MERGED graph + + mergedGraphPath + /tmp/beta_provision/graph/01_graph_merged + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/beta_provision/graph/02_graph_raw + + + + + + + Set the target path to store the the consistent graph cleaned + + cleanedFirstGraphPath + /tmp/beta_provision/graph/03_graph_cleaned + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/beta_provision/graph/04_graph_dedup + + + + + + + Set the target path to store the INFERRED graph + + inferredGraphPath + /tmp/beta_provision/graph/05_graph_inferred + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/beta_provision/graph/06_graph_consistent + + + + + + + Set the target path to store the ORCID enriched graph + + orcidGraphPath + /tmp/beta_provision/graph/07_graph_orcid + + + + + + + Set the target path to store the BULK TAGGED graph + + bulkTaggingGraphPath + /tmp/beta_provision/graph/08_graph_bulktagging + + + + + + + Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph + + affiliationGraphPath + /tmp/beta_provision/graph/09_graph_affiliation + + + + + + + Set the target path to store the COMMUNITY from SELECTED SOURCES graph + + communityOrganizationGraphPath + /tmp/beta_provision/graph/10_graph_comunity_organization + + + + + + + Set the target path to store the FUNDING from SEMANTIC RELATION graph + + fundingGraphPath + /tmp/beta_provision/graph/11_graph_funding + + + + + + + Set the target path to store the COMMUNITY from SEMANTIC RELATION graph + + communitySemRelGraphPath + /tmp/beta_provision/graph/12_graph_comunity_sem_rel + + + + + + + Set the target path to store the COUNTRY enriched graph + + countryGraphPath + /tmp/beta_provision/graph/13_graph_country + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/beta_provision/graph/14_graph_cleaned + + + + + + + Set the target path to store the blacklisted graph + + blacklistedGraphPath + /tmp/beta_provision/graph/15_graph_blacklisted + + + + + + + Set the map of paths for the Bulk Tagging + + bulkTaggingPathMap + {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} + + + + + + + Set the map of associations organization, community list for the propagation of community to result through organization + + propagationOrganizationCommunityMap + {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], + "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} + + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite + + + + + + + declares the ActionSet ids to promote in the INFERRED graph + + actionSetIdsIISGraph + iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims_PROD + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs_PROD + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims_PROD + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs_PROD + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB_PROD + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs_PROD + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF_PROD + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF_PROD + true + + + + + + + should apply the relations id patching based on the provided idMapping on PROD? + + shouldPatchRelations_PROD + true + + + + + + + set the PROD aggregator content path + + prodContentPath + /tmp/prod_aggregator_for_beta + + + + + + + Set the path containing the PROD AGGREGATOR graph + + prodAggregatorGraphPath + /tmp/beta_provision/graph/00_prod_graph_aggregator + + + + + + + reuse cached ODF claims from the BETA aggregation system + + reuseODFClaims_BETA + true + + + + + + + reuse cached ODF records on HDFS from the BETA aggregation system + + reuseODFhdfs_BETA + true + + + + + + + reuse cached OAF claims from the BETA aggregation system + + reuseOAFClaims_BETA + true + + + + + + + reuse cached OAF records on HDFS from the BETA aggregation system + + reuseOAFhdfs_BETA + true + + + + + + + reuse cached DB content from the BETA aggregation system + + reuseDB_BETA + true + + + + + + + reuse cached OpenOrgs content from the BETA aggregation system + + reuseDBOpenorgs_BETA + true + + + + + + + reuse cached ODF content from the BETA aggregation system + + reuseODF_BETA + true + + + + + + + reuse cached OAF content from the BETA aggregation system + + reuseOAF_BETA + true + + + + + + + should apply the relations id patching based on the provided idMapping on BETA? + + shouldPatchRelations_BETA + true + + + + + + + set the BETA aggregator content path + + betaContentPath + /tmp/beta_aggregator + + + + + + + Set the path containing the BETA AGGREGATOR graph + + betaAggregatorGraphPath + /tmp/beta_provision/graph/00_beta_graph_aggregator + + + + + + + wait configurations + + + + + + + + create the BETA AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'betaAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_BETA', + 'reuseOAFClaims' : 'reuseOAFClaims_BETA', + 'reuseDB' : 'reuseDB_BETA', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA', + 'reuseODF' : 'reuseODF_BETA', + 'reuseODF_hdfs' : 'reuseODFhdfs_BETA', + 'reuseOAF' : 'reuseOAF_BETA', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', + 'contentPath' : 'betaContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_BETA', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_provision/working_dir/beta_aggregator' + } + + build-report + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'prodAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_PROD', + 'reuseOAFClaims' : 'reuseOAFClaims_PROD', + 'reuseDB' : 'reuseDB_PROD', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD', + 'reuseODF' : 'reuseODF_PROD', + 'reuseODF_hdfs' : 'reuseODFhdfs_PROD', + 'reuseOAF' : 'reuseOAF_PROD', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', + 'contentPath' : 'prodContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_PROD', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_provision/working_dir/prod_aggregator' + } + + build-report + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'betaInputGraphPath' : 'betaAggregatorGraphPath', + 'prodInputGraphPath' : 'prodAggregatorGraphPath', + 'graphOutputPath' : 'mergedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/merge_graph', + 'priority' : 'BETA' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'mergedGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/clean' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/beta_provision/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + create the INFERRED graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsIISGraph', + 'inputGraphRootPath' : 'dedupGraphPath', + 'outputGraphRootPath' : 'inferredGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsIIS' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'inferredGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/beta_provision/working_dir/dedup' + } + + build-report + + + + + + + + propagates ORCID among results linked by allowedsemrels semantic relationships + + executeOozieJob + IIS + + { + 'sourcePath' : 'consistentGraphPath', + 'outputPath': 'orcidGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/orcidtoresultfromsemrel/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/orcid', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo;isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark results respecting some rules as belonging to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'orcidGraphPath', + 'outputPath': 'bulkTaggingGraphPath', + 'isLookUpUrl' : 'isLookUpUrl', + 'pathMap' : 'bulkTaggingPathMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/bulktag/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/bulktag' + } + + build-report + + + + + + + creates relashionships between results and organizations when the organizations are associated to institutional repositories + + executeOozieJob + IIS + + { + 'sourcePath' : 'bulkTaggingGraphPath', + 'outputPath': 'affiliationGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/affiliation/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/affiliation', + 'saveGraph' : 'true', + 'blacklist' : 'empty' + } + + build-report + + + + + + + marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap + + executeOozieJob + IIS + + { + 'sourcePath' : 'affiliationGraphPath', + 'outputPath': 'communityOrganizationGraphPath', + 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_organization/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/community_organization', + 'saveGraph' : 'true' + } + + build-report + + + + + + + created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects + + executeOozieJob + IIS + + { + 'sourcePath' : 'communityOrganizationGraphPath', + 'outputPath': 'fundingGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/funding/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/funding', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'fundingGraphPath', + 'outputPath': 'communitySemRelGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_semrel/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/community_semrel', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from + + executeOozieJob + IIS + + { + 'sourcePath' : 'communitySemRelGraphPath', + 'outputPath': 'countryGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/country/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'workingDir' : '/tmp/beta_provision/working_dir/country', + 'allowedtypes' : 'pubsrepository::institutional', + 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0', + 'saveGraph' : 'true' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'countryGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/clean' + } + + build-report + + + + + + + removes blacklisted relations + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedGraphPath', + 'outputPath': 'blacklistedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/blacklist', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '' + } + + build-report + + + + + + + + wf_20210803_134357_367 + 2021-08-03T17:08:11+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml new file mode 100644 index 000000000..be6155f2f --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml @@ -0,0 +1,778 @@ + +
+ + + + + +
+ + Graph construction [PROD NEW] + Data Provision + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________ + + + + + + + Set the path containing the PROD AGGREGATOR graph + + aggregatorGraphPath + /tmp/prod_provision/graph/00_prod_graph_aggregator + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/prod_provision/graph/01_graph_raw + + + + + + + Set the target path to store the the consistent graph cleaned + + cleanedFirstGraphPath + /tmp/prod_provision/graph/02_graph_cleaned + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/prod_provision/graph/03_graph_dedup + + + + + + + Set the target path to store the INFERRED graph + + inferredGraphPath + /tmp/prod_provision/graph/04_graph_inferred + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/prod_provision/graph/05_graph_consistent + + + + + + + Set the target path to store the ORCID enriched graph + + orcidGraphPath + /tmp/prod_provision/graph/06_graph_orcid + + + + + + + Set the target path to store the BULK TAGGED graph + + bulkTaggingGraphPath + /tmp/prod_provision/graph/07_graph_bulktagging + + + + + + + Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph + + affiliationGraphPath + /tmp/prod_provision/graph/08_graph_affiliation + + + + + + + Set the target path to store the COMMUNITY from SELECTED SOURCES graph + + communityOrganizationGraphPath + /tmp/prod_provision/graph/09_graph_comunity_organization + + + + + + + Set the target path to store the FUNDING from SEMANTIC RELATION graph + + fundingGraphPath + /tmp/prod_provision/graph/10_graph_funding + + + + + + + Set the target path to store the COMMUNITY from SEMANTIC RELATION graph + + communitySemRelGraphPath + /tmp/prod_provision/graph/11_graph_comunity_sem_rel + + + + + + + Set the target path to store the COUNTRY enriched graph + + countryGraphPath + /tmp/prod_provision/graph/12_graph_country + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/prod_provision/graph/13_graph_cleaned + + + + + + + Set the target path to store the blacklisted graph + + blacklistedGraphPath + /tmp/prod_provision/graph/14_graph_blacklisted + + + + + + + Set the map of paths for the Bulk Tagging + + bulkTaggingPathMap + {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} + + + + + + + Set the map of associations organization, community list for the propagation of community to result through organization + + propagationOrganizationCommunityMap + {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], + "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} + + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite + + + + + + + declares the ActionSet ids to promote in the INFERRED graph + + actionSetIdsIISGraph + iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF + true + + + + + + + set the PROD aggregator content path + + contentPath + /tmp/prod_aggregator + + + + + + + wait configurations + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'aggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims', + 'reuseOAFClaims' : 'reuseOAFClaims', + 'reuseDB' : 'reuseDB', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs', + 'reuseODF' : 'reuseODF', + 'reuseODF_hdfs' : 'reuseODFhdfs', + 'reuseOAF' : 'reuseOAF', + 'reuseOAF_hdfs' : 'reuseOAFhdfs', + 'contentPath' : 'contentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/prod_provision/working_dir/prod_aggregator' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'aggregatorGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/clean' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/prod_provision/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + create the INFERRED graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsIISGraph', + 'inputGraphRootPath' : 'dedupGraphPath', + 'outputGraphRootPath' : 'inferredGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsIIS' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'inferredGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/prod_provision/working_dir/dedup' + } + + build-report + + + + + + + propagates ORCID among results linked by allowedsemrels semantic relationships + + executeOozieJob + IIS + + { + 'sourcePath' : 'consistentGraphPath', + 'outputPath': 'orcidGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/orcidtoresultfromsemrel/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/orcid', + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + mark results respecting some rules as belonging to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'orcidGraphPath', + 'outputPath': 'bulkTaggingGraphPath', + 'isLookUpUrl' : 'isLookUpUrl', + 'pathMap' : 'bulkTaggingPathMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/bulktag/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/bulktag' + } + + build-report + + + + + + + creates relashionships between results and organizations when the organizations are associated to institutional repositories + + executeOozieJob + IIS + + { + 'sourcePath' : 'bulkTaggingGraphPath', + 'outputPath': 'affiliationGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/affiliation/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/affiliation', + 'saveGraph' : 'true', + 'blacklist' : 'empty' + } + + build-report + + + + + + + marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap + + executeOozieJob + IIS + + { + 'sourcePath' : 'affiliationGraphPath', + 'outputPath': 'communityOrganizationGraphPath', + 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_organization/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/community_organization', + 'saveGraph' : 'true' + } + + build-report + + + + + + + created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects + + executeOozieJob + IIS + + { + 'sourcePath' : 'communityOrganizationGraphPath', + 'outputPath': 'fundingGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/funding/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/funding', + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'fundingGraphPath', + 'outputPath': 'communitySemRelGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_semrel/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/community_semrel', + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from + + executeOozieJob + IIS + + { + 'sourcePath' : 'communitySemRelGraphPath', + 'outputPath': 'countryGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/country/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'workingDir' : '/tmp/prod_provision/working_dir/country', + 'allowedtypes' : 'pubsrepository::institutional', + 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0', + 'saveGraph' : 'true' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'countryGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/clean' + } + + build-report + + + + + + + removes blacklisted relations + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedGraphPath', + 'outputPath': 'blacklistedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/blacklist', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '' + } + + build-report + + + + + + + + wf_20210723_171026_279 + 2021-07-24T00:00:39+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml new file mode 100644 index 000000000..836e69d6f --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml @@ -0,0 +1,74 @@ + +
+ + + + + +
+ + Graph to HiveDB [PROD] + Data Provision + 30 + + + Set the path containing the AGGREGATOR graph + + inputPath + + + + + + + + Set the target path to store the RAW graph + + hiveDbName + + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'inputPath' : 'inputPath', + 'hiveDbName' : 'hiveDbName' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hive/oozie_app', + 'sparkDriverMemory' : '4G', + 'sparkExecutorMemory' : '10G', + 'sparkExecutorCores' : '3' + } + + build-report + + + + + + + + wf_20210728_075001_400 + 2021-07-28T08:04:00+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml new file mode 100644 index 000000000..6cdf41bb6 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml @@ -0,0 +1,99 @@ + +
+ + + + + +
+ + Update Solr [PROD] + Data Provision + 30 + + + Set the path containing the GRAPH to index + + inputGraphRootPath + /tmp/prod_provision/graph/14_graph_blacklisted + + + + + + + Set the target path to store the RAW graph + + format + DMF + + + + + + + Set the lookup address + + isLookupUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'inputGraphRootPath' : 'inputGraphRootPath', + 'isLookupUrl' : 'isLookupUrl', + 'format' : 'format' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/provision/oozie_app', + 'sourceMaxRelations' : '1000', + 'targetMaxRelations' : '10000000', + 'relPartitions' : '3000', + 'batchSize' : '2000', + 'relationFilter' : 'isAuthorInstitutionOf,produces,hasAmongTopNSimilarDocuments,cites,isCitedBy', + 'otherDsTypeId' : 'scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource', + 'resumeFrom' : 'prepare_relations', + 'shouldIndex' : 'true', + 'outputFormat' : 'SOLR', + 'sparkDriverMemoryForJoining' : '3G', + 'sparkExecutorMemoryForJoining' : '7G', + 'sparkExecutorCoresForJoining' : '4', + 'sparkDriverMemoryForIndexing' : '2G', + 'sparkExecutorMemoryForIndexing' : '2G', + 'sparkExecutorCoresForIndexing' : '64', + 'sparkNetworkTimeout' : '600', + 'workingDir' : '/tmp/prod_provision/working_dir/update_solr' + } + + build-report + + + + + + + + wf_20210724_062705_620 + 2021-07-25T13:25:37+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml new file mode 100644 index 000000000..4dfae3c7d --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml @@ -0,0 +1,100 @@ + +
+ + + + + +
+ + Update Stats [PROD] + Data Provision + 30 + + + Set the OpenAIRE graph DB name + + openaire_db_name + openaire_prod_yyyyMMdd + + + + + + + Set the STATS DB name + + stats_db_name + openaire_prod_stats_yyyyMMdd + + + + + + + Set the STATS MONITOR DB name + + monitor_db_name + openaire_prod_stats_monitor_yyyyMMdd + + + + + + + Set the STATS OBSERVATORY DB name + + observatory_db_name + openaire_prod_stats_observatory_yyyyMMdd + + + + + + + wait configurations + + + + + + + update the content in the stats DB + + executeOozieJob + IIS + + { + 'openaire_db_name' : 'openaire_db_name', + 'stats_db_name' : 'stats_db_name', + 'monitor_db_name' : 'monitor_db_name', + 'observatory_db_name' : 'observatory_db_name' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_update/oozie_app', + 'hive_timeout' : '15000', + 'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool', + 'stats_db_shadow_name' : 'openaire_prod_stats_shadow', + 'external_stats_db_name' : 'stats_ext', + 'monitor_db_shadow_name' : 'openaire_prod_stats_monitor_shadow', + 'observatory_db_shadow_name' : 'openaire_prod_stats_observatory_shadow', + 'context_api_url' : 'https://services.openaire.eu/openaire' + } + + build-report + + + + + + + + wf_20210725_065608_71 + 2021-07-26T07:35:55+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml new file mode 100644 index 000000000..d8def071f --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml @@ -0,0 +1,87 @@ + +
+ + + + + +
+ + Publish Stats [PROD] + Content Publishing + 35 + + + Set the STATS DB name + + stats_db_name + openaire_prod_stats_yyyyMMdd + + + + + + + Set the STATS MONITOR DB name + + monitor_db_name + openaire_prod_stats_monitor_yyyyMMdd + + + + + + + Set the STATS OBSERVATORY DB name + + observatory_db_name + openaire_prod_stats_observatory_yyyyMMdd + + + + + + + wait configurations + + + + + + + publishes the stats DB to the public schema + + executeOozieJob + IIS + + { + 'stats_db_name' : 'stats_db_name', + 'monitor_db_name' : 'monitor_db_name', + 'observatory_db_name' : 'observatory_db_name' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_promote/oozie_app', + 'hive_timeout' : '150000', + 'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool', + 'stats_db_production_name' : 'openaire_prod_stats', + 'monitor_db_production_name' : 'openaire_prod_stats_monitor', + 'observatory_db_production_name' : 'openaire_prod_stats_observatory' + } + + build-report + + + + + + + + wf_20210727_160728_625 + 2021-07-27T16:53:01+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml new file mode 100644 index 000000000..cf337fd7e --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml @@ -0,0 +1,131 @@ + +
+ + + + + +
+ + Update Broker events [PROD OCEAN] + Data Provision + 30 + + + Set the path containing the GRAPH to scan + + graphInputPath + + + + + + + + Set the datasource Ids Whitelist + + datasourceIdWhitelist + openaire____::9ecafa3655143cbc4bc75853035cd432,opendoar____::dc6e224a8d74ce03bf301152d6e33e97,openaire____::09da65eaaa6deac2f785df1e0ae95a06,openaire____::3db634fc5446f389d0b826ea400a5da6,openaire____::5a38cb462ac487bf26bdb86009fe3e74,openaire____::3c29379cc184f66861e858bc7aa9615b,openaire____::4657147e48a1f32637bfe3743bce76c6,openaire____::c3267ea1c3f378c456209b6df241624e,opendoar____::358aee4cc897452c00244351e4d91f69,re3data_____::7b0ad08687b2c960d5aeef06f811d5e6,opendoar____::798ed7d4ee7138d49b8828958048130a,opendoar____::6f4922f45568161a8cdf4ad2299f6d23,opendoar____::4aa0e93b918848be0b7728b4b1568d8a,openaire____::02b55e4f52388520bfe11f959f836e68 + + + + + + + Set the datasource type Whitelist + + datasourceTypeWhitelist + pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic,datarepository::unknown,orprepository,softwarerepository + + + + + + + Set the datasource Id Blacklist + + datasourceIdBlacklist + - + + + + + + + Set the TOPIC whitelist (* = all topics) + + topicWhitelist + ENRICH/MISSING/SUBJECT/DDC,ENRICH/MISSING/SUBJECT/JEL,ENRICH/MISSING/SUBJECT/MESHEUROPMC,ENRICH/MISSING/PUBLICATION_DATE,ENRICH/MISSING/PID,ENRICH/MISSING/PROJECT,ENRICH/MISSING/SUBJECT/ACM,ENRICH/MISSING/SUBJECT/ARXIV,ENRICH/MISSING/OPENACCESS_VERSION,ENRICH/MISSING/AUTHOR/ORCID,ENRICH/MISSING/ABSTRACT,ENRICH/MORE/SUBJECT/ACM,ENRICH/MORE/SUBJECT/ARXIV,ENRICH/MORE/SUBJECT/DDC,ENRICH/MORE/SUBJECT/JEL,ENRICH/MORE/OPENACCESS_VERSION,ENRICH/MORE/SUBJECT/MESHEUROPMC,ENRICH/MORE/PID + + + + + + + Set the output path to store the Event records + + outputDir + /var/lib/dnet/broker_PROD/events + + + + + + + wait configurations + + + + + + + update the BROKER events + + executeOozieJob + IIS + + { + 'graphInputPath' : 'graphInputPath', + 'datasourceIdWhitelist' : 'datasourceIdWhitelist', + 'datasourceTypeWhitelist' : 'datasourceTypeWhitelist', + 'datasourceIdBlacklist' : 'datasourceIdBlacklist', + 'topicWhitelist' : 'topicWhitelist', + 'outputDir' : 'outputDir' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/broker/generate_events/oozie_app', + 'esEventIndexName' : '', + 'esNotificationsIndexName' : '', + 'esIndexHost' : '', + 'maxIndexedEventsForDsAndTopic' : '100', + 'esBatchWriteRetryCount' : '8', + 'esBatchWriteRetryWait' : '60s', + 'esBatchSizeEntries' : '200', + 'esNodesWanOnly' : 'true', + 'brokerApiBaseUrl' : '', + 'brokerDbUrl' : '', + 'brokerDbUser' : '', + 'brokerDbPassword' : '', + 'sparkDriverMemory' : '3G', + 'sparkExecutorMemory' : '7G', + 'sparkExecutorCores' : '6', + 'workingDir' : '/tmp/prod_provision/working_dir/broker_events' + } + + build-report + + + + + + + + wf_20210709_073839_206 + 2021-07-09T11:01:01+00:00 + FAILURE + + + +
\ No newline at end of file From 2efa5abda5e4cbebcc4061c0f3866fe431d3163e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 12:28:36 +0200 Subject: [PATCH 152/287] refactoring --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 30e500da3..71b20b356 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -123,8 +123,6 @@
- - From bc9e3a06ba9a436d2bbe00d56accd78f55508abd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 15:46:06 +0200 Subject: [PATCH 153/287] Graph Dump - extended the test class --- .../dhp/oa/graph/dump/DumpJobTest.java | 609 +++++++++++------- 1 file changed, 374 insertions(+), 235 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index d7c247499..3a4f05fb5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -145,227 +145,7 @@ public class DumpJobTest { } @Test - public void testDataset() { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(90, verificationDataset.count()); - -// verificationDataset -// .filter("id = '50|DansKnawCris::1a960e20087cb46b93588e4e184e8a58'") -// .foreach((ForeachFunction) rec -> System.out.println(OBJECT_MAPPER.writeValueAsString(rec))); - - Assertions - .assertTrue( - verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset - .filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'") - .count()); - - Assertions - .assertTrue( - verificationDataset.filter("bestAccessright.code = 'c_16ec'").count() == verificationDataset - .filter("bestAccessright.code = 'c_16ec' and bestAccessright.label = 'RESTRICTED'") - .count()); - - Assertions - .assertTrue( - verificationDataset.filter("bestAccessright.code = 'c_14cb'").count() == verificationDataset - .filter("bestAccessright.code = 'c_14cb' and bestAccessright.label = 'CLOSED'") - .count()); - - Assertions - .assertTrue( - verificationDataset.filter("bestAccessright.code = 'c_f1cf'").count() == verificationDataset - .filter("bestAccessright.code = 'c_f1cf' and bestAccessright.label = 'EMBARGO'") - .count()); - - Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90); - - Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90); - - - - } - - @Test - public void testDataset2All() { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult.class)); - - Assertions.assertEquals(5, verificationDataset.count()); - - verificationDataset - .foreach((ForeachFunction) res -> System.out.println(OBJECT_MAPPER.writeValueAsString(res))); - } - - @Test - public void testDataset2Communities() { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(0, verificationDataset.count()); - - verificationDataset.show(false); - } - - @Test - public void testPublication() { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(74, verificationDataset.count()); - verificationDataset.show(false); - - Assertions.assertEquals(74, verificationDataset.filter("type = 'publication'").count()); - -//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) - - } - - @Test - public void testPublicationExtendedInstance2Community() throws JsonProcessingException { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run(false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(1, verificationDataset.count()); - - Assertions.assertEquals(1, verificationDataset.filter("type = 'publication'").count()); - - //the common fields in the result have been checked with the test below. Now checking only - // community specific fields - - CommunityResult cr = verificationDataset.first(); - - Assertions.assertEquals(1, cr.getContext().size()); - Assertions.assertEquals("dh-ch", cr.getContext().get(0).getCode()); - Assertions.assertEquals("Digital Humanities and Cultural Heritage", cr.getContext().get(0).getLabel()); - Assertions.assertEquals(1, cr.getContext().get(0).getProvenance().size()); - Assertions.assertEquals("Inferred by OpenAIRE", cr.getContext().get(0).getProvenance().get(0).getProvenance()); - Assertions.assertEquals("0.9", cr.getContext().get(0).getProvenance().get(0).getTrust()); - - Assertions.assertEquals(1, cr.getCollectedfrom().size()); - Assertions.assertEquals("10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getCollectedfrom().get(0).getKey()); - Assertions.assertEquals("Pensoft", cr.getCollectedfrom().get(0).getValue()); - - Assertions.assertEquals(1, cr.getInstance().size()); - Assertions.assertEquals("10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getInstance().get(0).getCollectedfrom().getKey()); - Assertions.assertEquals("Pensoft", cr.getInstance().get(0).getCollectedfrom().getValue()); - Assertions.assertEquals("10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd", cr.getInstance().get(0).getHostedby().getKey()); - Assertions.assertEquals("One Ecosystem",cr.getInstance().get(0).getHostedby().getValue()); - - - } - - @Test - public void testPublicationExtendedInstance(){ + public void testPublicationDump(){ final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance") .getPath(); @@ -519,7 +299,7 @@ public class DumpJobTest { Assertions.assertEquals(0, instance.getPid().size()); Assertions.assertEquals(1, instance.getAlternateIdentifier().size()); Assertions.assertTrue(instance.getAlternateIdentifier().get(0).getScheme().equals("doi") - && instance.getAlternateIdentifier().get(0).getValue().equals("10.3897/oneeco.2.e13718")); + && instance.getAlternateIdentifier().get(0).getValue().equals("10.3897/oneeco.2.e13718")); Assertions.assertEquals(null, instance.getLicense()); Assertions.assertTrue(instance.getAccessright().getCode().equals(Constants.accessRightsCoarMap .get(ModelConstants.ACCESS_RIGHT_OPEN))); @@ -532,8 +312,379 @@ public class DumpJobTest { Assertions.assertEquals("2017-01-01",instance.getPublicationdate()); Assertions.assertEquals(null,instance.getArticleprocessingcharge()); Assertions.assertEquals("peerReviewed", instance.getRefereed()); + } + @Test + public void testDatasetDump(){ + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance") + .getPath(); + + final String communityMapPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); + + DumpProducts dump = new DumpProducts(); + dump + .run(false, sourcePath, workingDir.toString() + "/result", + communityMapPath, Dataset.class, + GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); + + Assertions.assertEquals(1, verificationDataset.count()); + + Assertions.assertEquals(1, verificationDataset.filter("type = 'dataset'").count()); + + //the common fields in the result have been already checked. Now checking only + // community specific fields + + GraphResult gr = verificationDataset.first(); + + Assertions.assertEquals(2, gr.getGeolocation().size()); + Assertions.assertEquals(2, gr.getGeolocation().stream().filter(gl -> gl.getBox().equals("")).count()); + Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPlace().equals("")).count()); + Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("")).count()); + Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPlace().equals("18 York St, Ottawa, ON K1N 5S6; Ottawa; Ontario; Canada")).count()); + Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("45.427242 -75.693904")).count()); + Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("") && !gl.getPlace().equals("")).count()); + Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> !gl.getPoint().equals("") && gl.getPlace().equals("")).count()); + + Assertions.assertEquals("1024Gb", gr.getSize()); + + Assertions.assertEquals("1.01", gr.getVersion()); + + Assertions.assertEquals(null, gr.getContainer()); + Assertions.assertEquals(null, gr.getCodeRepositoryUrl()); + Assertions.assertEquals(null, gr.getProgrammingLanguage()); + Assertions.assertEquals(null, gr.getDocumentationUrl()); + Assertions.assertEquals(null, gr.getContactperson()); + Assertions.assertEquals(null, gr.getContactgroup()); + Assertions.assertEquals(null, gr.getTool()); + + } + + @Test + public void testSoftwareDump(){ + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance") + .getPath(); + + final String communityMapPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); + + DumpProducts dump = new DumpProducts(); + dump + .run(false, sourcePath, workingDir.toString() + "/result", + communityMapPath, Software.class, + GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); + + Assertions.assertEquals(1, verificationDataset.count()); + + Assertions.assertEquals(1, verificationDataset.filter("type = 'software'").count()); + + GraphResult gr = verificationDataset.first(); + + Assertions.assertEquals(2, gr.getDocumentationUrl().size()); + Assertions.assertTrue(gr.getDocumentationUrl().contains("doc_url_1")); + Assertions.assertTrue(gr.getDocumentationUrl().contains("doc_url_2")); + + Assertions.assertEquals("code_repo", gr.getCodeRepositoryUrl()); + + Assertions.assertEquals("perl", gr.getProgrammingLanguage()); + + + Assertions.assertEquals(null, gr.getContainer()); + Assertions.assertEquals(null, gr.getContactperson()); + Assertions.assertEquals(null, gr.getContactgroup()); + Assertions.assertEquals(null, gr.getTool()); + Assertions.assertEquals(null, gr.getGeolocation()); + Assertions.assertEquals(null, gr.getSize()); + Assertions.assertEquals(null, gr.getVersion()); + + } + + @Test + public void testOrpDump(){ + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance") + .getPath(); + + final String communityMapPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); + + DumpProducts dump = new DumpProducts(); + dump + .run(false, sourcePath, workingDir.toString() + "/result", + communityMapPath, OtherResearchProduct.class, + GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); + + Assertions.assertEquals(1, verificationDataset.count()); + + Assertions.assertEquals(1, verificationDataset.filter("type = 'other'").count()); + + GraphResult gr = verificationDataset.first(); + + Assertions.assertEquals(2, gr.getContactperson().size()); + Assertions.assertTrue(gr.getContactperson().contains(("contact_person1"))); + Assertions.assertTrue(gr.getContactperson().contains(("contact_person2"))); + + Assertions.assertEquals(1, gr.getContactgroup().size()); + Assertions.assertTrue(gr.getContactgroup().contains(("contact_group"))); + + Assertions.assertEquals(2, gr.getTool().size()); + Assertions.assertTrue(gr.getTool().contains("tool1")); + Assertions.assertTrue(gr.getTool().contains("tool2")); + + + Assertions.assertEquals(null, gr.getContainer()); + Assertions.assertEquals(null, gr.getDocumentationUrl()); + Assertions.assertEquals(null, gr.getCodeRepositoryUrl()); + Assertions.assertEquals(null, gr.getProgrammingLanguage()); + Assertions.assertEquals(null, gr.getGeolocation()); + Assertions.assertEquals(null, gr.getSize()); + Assertions.assertEquals(null, gr.getVersion()); + + } + + @Test + public void testPublicationDumpCommunity() throws JsonProcessingException { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance") + .getPath(); + + final String communityMapPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); + + DumpProducts dump = new DumpProducts(); + dump + .run(false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + + Assertions.assertEquals(1, verificationDataset.count()); + + Assertions.assertEquals(1, verificationDataset.filter("type = 'publication'").count()); + + //the common fields in the result have been already checked. Now checking only + // community specific fields + + CommunityResult cr = verificationDataset.first(); + + Assertions.assertEquals(1, cr.getContext().size()); + Assertions.assertEquals("dh-ch", cr.getContext().get(0).getCode()); + Assertions.assertEquals("Digital Humanities and Cultural Heritage", cr.getContext().get(0).getLabel()); + Assertions.assertEquals(1, cr.getContext().get(0).getProvenance().size()); + Assertions.assertEquals("Inferred by OpenAIRE", cr.getContext().get(0).getProvenance().get(0).getProvenance()); + Assertions.assertEquals("0.9", cr.getContext().get(0).getProvenance().get(0).getTrust()); + + Assertions.assertEquals(1, cr.getCollectedfrom().size()); + Assertions.assertEquals("10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getCollectedfrom().get(0).getKey()); + Assertions.assertEquals("Pensoft", cr.getCollectedfrom().get(0).getValue()); + + Assertions.assertEquals(1, cr.getInstance().size()); + Assertions.assertEquals("10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getInstance().get(0).getCollectedfrom().getKey()); + Assertions.assertEquals("Pensoft", cr.getInstance().get(0).getCollectedfrom().getValue()); + Assertions.assertEquals("10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd", cr.getInstance().get(0).getHostedby().getKey()); + Assertions.assertEquals("One Ecosystem",cr.getInstance().get(0).getHostedby().getValue()); + + + } + + @Test + public void testDataset() { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json") + .getPath(); + + final String communityMapPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); + + DumpProducts dump = new DumpProducts(); + dump + .run( + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, + CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + + Assertions.assertEquals(90, verificationDataset.count()); + + Assertions + .assertTrue( + verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset + .filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'") + .count()); + + Assertions + .assertTrue( + verificationDataset.filter("bestAccessright.code = 'c_16ec'").count() == verificationDataset + .filter("bestAccessright.code = 'c_16ec' and bestAccessright.label = 'RESTRICTED'") + .count()); + + Assertions + .assertTrue( + verificationDataset.filter("bestAccessright.code = 'c_14cb'").count() == verificationDataset + .filter("bestAccessright.code = 'c_14cb' and bestAccessright.label = 'CLOSED'") + .count()); + + Assertions + .assertTrue( + verificationDataset.filter("bestAccessright.code = 'c_f1cf'").count() == verificationDataset + .filter("bestAccessright.code = 'c_f1cf' and bestAccessright.label = 'EMBARGO'") + .count()); + + Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90); + + Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90); + + + + } + + @Test + public void testDataset2All() { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned") + .getPath(); + + final String communityMapPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); + + DumpProducts dump = new DumpProducts(); + dump + .run( + // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, + GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult.class)); + + Assertions.assertEquals(5, verificationDataset.count()); + + } + + @Test + public void testDataset2Communities() { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned") + .getPath(); + + final String communityMapPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); + + DumpProducts dump = new DumpProducts(); + dump + .run( + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, + CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + + Assertions.assertEquals(0, verificationDataset.count()); + + + } + + @Test + public void testPublication() { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json") + .getPath(); + + final String communityMapPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); + + DumpProducts dump = new DumpProducts(); + dump + .run( + // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + + Assertions.assertEquals(74, verificationDataset.count()); + verificationDataset.show(false); + + Assertions.assertEquals(74, verificationDataset.filter("type = 'publication'").count()); + } @Test @@ -566,9 +717,7 @@ public class DumpJobTest { Assertions.assertEquals(6, verificationDataset.count()); Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count()); - verificationDataset.show(false); -//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) } @@ -602,9 +751,6 @@ public class DumpJobTest { Assertions.assertEquals(3, verificationDataset.count()); Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count()); - verificationDataset.show(false); - -//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) } @@ -621,7 +767,6 @@ public class DumpJobTest { DumpProducts dump = new DumpProducts(); dump .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); @@ -668,7 +813,7 @@ public class DumpJobTest { .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); Assertions.assertEquals(23, verificationDataset.count()); - // verificationDataset.show(false); + Assertions.assertEquals(23, verificationDataset.filter("type = 'publication'").count()); @@ -688,12 +833,6 @@ public class DumpJobTest { Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1); - -// verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset -// .filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'") -// .count() - -//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) } } From 8daaa32e90f1ddc88a24c2603723230ae57cec9a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 15:46:29 +0200 Subject: [PATCH 154/287] Graph Dump - added resources for testing --- .../dhp/oa/graph/dump/resultDump/dataset_extendedinstance | 1 + .../eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance | 1 + .../dhp/oa/graph/dump/resultDump/software_extendedinstance | 1 + 3 files changed, 3 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance new file mode 100644 index 000000000..14ca9c10c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance @@ -0,0 +1 @@ +{"author":[{"affiliation":[],"fullname":"Santana Quintero, Mario","name":"Mario","pid":[],"rank":1,"surname":"Santana Quintero"},{"affiliation":[],"fullname":"Giansante, Christian","name":"Christian","pid":[],"rank":2,"surname":"Giansante"},{"affiliation":[],"fullname":"Sharp, Rachel","name":"Rachel","pid":[],"rank":3,"surname":"Sharp"},{"affiliation":[],"fullname":"Bernard, Tristan","name":"Tristan","pid":[],"rank":4,"surname":"Bernard"},{"affiliation":[],"fullname":"Waheed, Khadija","name":"Khadija","pid":[],"rank":5,"surname":"Waheed"},{"affiliation":[],"fullname":"Merchant, Arkoun","name":"Arkoun","pid":[],"rank":6,"surname":"Merchant"},{"affiliation":[],"fullname":"Lapko, Spencer","name":"Spencer","pid":[],"rank":7,"surname":"Lapko"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::e783372970a1dc066ce99c673090ff88","value":"Federated Research Data Repository / Dépôt fédéré de données de recherche"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Federated Research Data Repository"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Dépôt fédéré de données de recherche"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2020-07-05"},"dateofcollection":"2021-07-11T02:32:34Z","dateoftransformation":"2021-07-11T11:14:27.539Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Although 18b York St does not have an official heritage designation, it has important social ties with the Ottawa Francophone community, significant contextual values, and played a role in the Byward Market's economic history. 18 York St was built on October 24th, 1877, only 23 years after Bytown's official declaration as a city. It's architect, J. Bowes, designed approximately 30 other buildings in the Ottawa area during his career. The four-storey, masonry clad building was originally designed to house L'Institut Canadien Français d'Ottawa. This organization played a key role in the advancement to officially make Ottawa a bilingual city. Nearly a century later, 18 York St was acquired by the NCC after it had been gutted by a fire in 1970, and was rehabilitated into a Franco-Ontarian theatre. This is the Integrated Project Dossier compiled by a group of undergraduate students of the Architectural Conservation and Sustainability Program (Engineers and architects) at Carleton University for the CIVE3207 (ARCN4100) Historic Site Recording and Assessment course in 20176."}],"size":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1024Gb"},"version":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1.01"},"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"geolocation":[{"box":"","place":"","point":"45.427242 -75.693904"},{"box":"","place":"18 York St, Ottawa, ON K1N 5S6; Ottawa; Ontario; Canada","point":""}],"id":"50|475c1990cbb2::44cb0c6b5bca7e84f5eab8bbdbd2c477","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5683/sp2/8xnbta"}],"collectedfrom":{"key":"10|openaire____::e783372970a1dc066ce99c673090ff88","value":"Federated Research Data Repository / Dépôt fédéré de données de recherche"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2020-07-05"},"hostedby":{"key":"10|openaire____::e783372970a1dc066ce99c673090ff88","value":"Federated Research Data Repository / Dépôt fédéré de données de recherche"},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.5683/SP2/8XNBTA"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627061571989,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fwww.frdr-dfdr.ca%2Fviringo%2Foai%2F","datestamp":"2020-07-05T00:00:00Z","harvestDate":"2021-07-11T02:32:34Z","identifier":"oai:dataverse.scholarsportal.info-dataverse-carleton:113237_106202","metadataNamespace":""}},"originalId":["50|475c1990cbb2::44cb0c6b5bca7e84f5eab8bbdbd2c477","oai:dataverse.scholarsportal.info-dataverse-carleton:113237_106202"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Carleton University Dataverse"},"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2020-07-05"}],"resourcetype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"storagedate":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2020-07-05"},"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Other"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Social Sciences"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Arts and Humanities"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Engineering"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Historic Building"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Heritage building"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"National Capital Commission"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Ottawa"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"18b York Street - Sidedoor Restaurant - Integrated Project Dossier (2017)"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance new file mode 100644 index 000000000..a15f628bc --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance @@ -0,0 +1 @@ +{"author":[{"affiliation":[],"fullname":"Schennink, B.H.C.","name":"B.H.C.","pid":[],"rank":1,"surname":"Schennink"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","value":"Metis Radboud University"}],"contactgroup":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"contact_group"}],"contactperson":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"contact_person1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"contact_person2"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2001-01-01"},"dateofcollection":"2021-04-08T23:45:38.202Z","dateoftransformation":"2021-04-09T00:59:19.769Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Dialogue Seminar between the European Union & the Churches - Brussels - conference contribution"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|MetisRadboud::a07b78b5c24eb013a4d88075e2235aeb","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","value":"Metis Radboud University"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2001-01-01"},"hostedby":{"key":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","value":"Metis Radboud University"},"instancetype":{"classid":"0020","classname":"Other ORP type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":[""]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627061349796,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Foamemtfp.uci.ru.nl%2Fmetis-oaipmh-endpoint%2FOAIHandler","datestamp":"2009-09-07","harvestDate":"2021-04-08T23:45:38.202Z","identifier":"oai:metis.ru.nl:Products/162661","metadataNamespace":""}},"originalId":["50|MetisRadboud::a07b78b5c24eb013a4d88075e2235aeb","oai:metis.ru.nl:Products/162661"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2001-01-01"}],"resourcetype":{"classid":"0020","classname":"0020","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"other","classname":"other","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Churches as source of information and early-warning about potential conflicts"}],"tool":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"tool1"}, {"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"tool2"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance new file mode 100644 index 000000000..a70a157d2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance @@ -0,0 +1 @@ +{"author":[{"affiliation":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"[u'Korean Institute for Advanced Study (KIAS)']"}],"fullname":"Kim, Hyun-Jung","name":"Hyun-Jung","pid":[],"rank":1,"surname":"Kim"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-08-23"},"dateofcollection":"2018-10-28T00:39:04.337Z","dateoftransformation":"","description":[{"value":"TBFIT Tight-binding FITting package (TBFIT*) Now you can fit your tight-binding parameters via Slatet-Koster method with a very little effort. TBFIT is a scientific Fortran program for numerical tight-binding parameter fitting mainly based on Slater-Koster scheme and tight-binding calculations for the electronic band structures of given atomic and electronic configurations with a simple input interfaces. Basically TBFIT fits Slater-Koster parameters including scaling factors to your target first-principles band structure. For the fitting algorithm, Levenberg–Marquardt algorithm is used (implemented by modifying MINPACK library). You can taste how TBFIT works and its performance in Example section. Once you get proper tight-binding parameters, you can also calculate various Berry phase related quantities, such as Berry curvature, Zak phase, Wilson's loop (Z2 index, Wannier charge center), first Chern number, and so on. In addition, density of states, eigenstate charge density or wavefunction plot, STM simulations (integerated eigenstate density within certain energy window), band structures for edge/surface geometries, E-field effects, etc. You do not need to specify all the bond connections, instead, just provide hopping classes (whether it is 1st-, 2nd-, or 3rd-nearest-neighbor hopping) between atomic species and corresponding hopping parameter names in your input file. Then TBFIT automatically set up tight-binding hamiltonian based on your input geometry and tight-binding parameter setup as defined in input interfaces. In the future release, I will add some routines for the Green function approach to get the surface state (or edge spectrum as well) and the routines for the spin/mirror Chern number evaluation within the given tight-binding parameter and model hamiltonian. For the details and examples you can find documents that describes the input tags in MANUAL folder and several input/output files in EXAMPLE folder, respectively. NOTE: In the current version, very limited routines are MPI parallelized (basically k-point parallelism is applied for STM, eigenstate plot, band structure calculations, parameter fitting, density of states, and berry curvature, etc.). Unfortunately the Wilson's loop calculation routines are not parallelized yet. If you publish the results of TBFIT then please site this github version : Hyun-Jung Kim, Tight-binding fitting package (TBFIT) (Version v0.2.2) * a temporary name TBFIT Tight-binding Topological invariantSlater-Koster parameter Levenberg–Marquardt algorithmparameter fitting"},{"value":"* This is a beta version. The official version will be released soon."}],"documentationUrl":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doc_url_1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doc_url_2"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|doi_________::0085d346d3078342a1279fa6c8a30327","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-08-23"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1402480"}],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://zenodo.org/record/1402480","http://dx.doi.org/10.5281/zenodo.1402480"]},{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"value":"2018-08-23"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"instancetype":{"classid":"0029","classname":"Software","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1402480"}],"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://dx.doi.org/10.5281/zenodo.1402480"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627060583158,"license":[],"measures":[],"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"mongodb%3A%2F%2Fbeta.services.openaire.eu%3A27017","datestamp":"","harvestDate":"2020-11-18T11:24:22.086Z","identifier":"10.5281/zenodo.1402480","metadataNamespace":""}},"originalId":["10.5281/zenodo.1402480","50|datacite____::0085d346d3078342a1279fa6c8a30327"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1402480"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.5281/zenodo.1402480"}],"programmingLanguage":{"classid":"perl","classname":"progLangclassname","schemeid":"pl_schemeid","schemename":"pl_schemename"},"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Zenodo"},"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2018-08-23"},{"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2018-08-23"}],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"software","classname":"software","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"TBFIT"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Tight-binding"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Topological invariant"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Slater-Koster"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Lavenberg-Marquardt"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Parameter fitting"},{"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"TBFIT"},{"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Tight-binding"},{"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Topological invariant"},{"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Slater-Koster"},{"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Lavenberg-Marquardt"},{"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Parameter fitting"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tbfit: Tight-Binding Fitting Package"}], "codeRepositoryUrl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"code_repo"}} \ No newline at end of file From bd0d7bfba70bff61b1ca96474d7b39059eaff796 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 16:36:17 +0200 Subject: [PATCH 155/287] Graph Dump - added resources for testing addition of validation date in project for communityresult --- .../dhp/oa/graph/dump/addProjectInfo/preparedInfoValidated | 2 ++ .../dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/preparedInfoValidated create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/preparedInfoValidated b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/preparedInfoValidated new file mode 100644 index 000000000..588bec804 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/preparedInfoValidated @@ -0,0 +1,2 @@ +{"resultId":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","projectsList":[{"id":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","code":"123455","acronym":null,"title":"Business services for rural bioenergy entrepreneurship in Finland: a network analysis approach","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"sysimport:crosswalk:entityregistry","trust":"0.900000000000000022"},"validated":{"validationDate":"2021-08-06","validatedByFunder":true}},{"id":"40|aka_________::03376222b28a3aebf2730ac514818d04","code":"119027","acronym":null,"title":"EGFR Tyrosine Kinase Inhibitors and LKB1 Tumor Suppressor in Non-Small-Cell Lung Cancer","funder":{"shortName":"EC","name":"European Commission","jurisdiction":"EU","fundingStream":"H2020"},"provenance":{"provenance":"sysimport:crosswalk:entityregistry","trust":"0.900000000000000022"},"validated":null}]} +{"resultId":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80","projectsList":[{"id":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","code":"123455","acronym":null,"title":"Business services for rural bioenergy entrepreneurship in Finland: a network analysis approach","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"sysimport:crosswalk:entityregistry","trust":"0.900000000000000022"},"validated":{"validationDate":"2021-08-04","validatedByFunder":true}}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel new file mode 100644 index 000000000..6b146405a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel @@ -0,0 +1,2 @@ +{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":null},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} +{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":null},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file From a90bac3bc92d44f1628dd309ee5e7404b4b06e09 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 16:36:54 +0200 Subject: [PATCH 156/287] Graph Dump - added method to test class to verify addition of validation date in projects for community result --- .../oa/graph/dump/UpdateProjectInfoTest.java | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java index 20a46cee0..fd34433c6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java @@ -5,11 +5,17 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; +import java.util.logging.Filter; +import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.dump.oaf.community.Funder; +import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; @@ -135,4 +141,85 @@ public class UpdateProjectInfoTest { resultExplodedProvenance.show(false); } + @Test + public void testValidatedRelation() throws Exception{ + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo") + .getPath(); + + SparkUpdateProjectInfo.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-preparedInfoPath", sourcePath + "/preparedInfoValidated", + "-outputPath", workingDir.toString() + "/result", + "-sourcePath", sourcePath + "/publication_extendedmodel" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + + verificationDataset.show(false); + + Assertions.assertEquals(2, verificationDataset.count()); + verificationDataset.createOrReplaceTempView("dataset"); + + String query = "select id, MyT.code code, MyT.title title, MyT.funder.name funderName, MyT.funder.shortName funderShortName, " + + + "MyT.funder.jurisdiction funderJurisdiction, MyT.funder.fundingStream fundingStream, MyT.validated " + + "from dataset " + + "lateral view explode(projects) p as MyT "; + + org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query); + + Assertions.assertEquals(2, resultExplodedProvenance.count()); + resultExplodedProvenance.show(false); + + Assertions + .assertEquals( + 2, + resultExplodedProvenance.filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2'").count()); + + Assertions + .assertEquals( + 1, + resultExplodedProvenance + .filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2' and code = '123455'") + .count()); + + Assertions + .assertEquals( + 1, + resultExplodedProvenance + .filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2' and code = '119027'") + .count()); + + Project project = verificationDataset + .map((MapFunction) cr -> cr.getProjects().stream().filter(p -> p.getValidated() != null).collect(Collectors.toList()).get(0) + , Encoders.bean(Project.class)).first(); + + Assertions.assertTrue(project.getFunder().getName().equals("Academy of Finland")); + Assertions.assertTrue(project.getFunder().getShortName().equals("AKA")); + Assertions.assertTrue(project.getFunder().getJurisdiction().equals("FI")); + Assertions.assertTrue(project.getFunder().getFundingStream() == null); + Assertions.assertTrue(project.getValidated().getValidationDate().equals("2021-08-06")); + + + project = verificationDataset + .map((MapFunction) cr -> cr.getProjects().stream().filter(p -> p.getValidated() == null).collect(Collectors.toList()).get(0) + , Encoders.bean(Project.class)).first(); + + + Assertions.assertTrue(project.getFunder().getName().equals("European Commission")); + Assertions.assertTrue(project.getFunder().getShortName().equals("EC")); + Assertions.assertTrue(project.getFunder().getJurisdiction().equals("EU")); + Assertions.assertTrue(project.getFunder().getFundingStream().equals("H2020")); + + + } + } From 9731a6144a4ed9df6a513b81cc5c390b9328971b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 10 Aug 2021 17:49:45 +0200 Subject: [PATCH 157/287] hostedbymap - in case the journal is open access the access may be changed also for the best access right in the result --- .../dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 312513285..4c3b98f3b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -34,6 +34,7 @@ object SparkApplyHostedByMapToResult { if (ei.getOpenaccess) { inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.hybrid) + p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance())); } } From b688567db5238276a7826b7f3e3ccd9962f44f53 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 10:12:10 +0200 Subject: [PATCH 158/287] hostedbymap - modified part of test to check the bestaccessright changed --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala | 8 ++++++-- .../eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala index c48c3b35d..a48e52702 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -55,13 +55,17 @@ class TestApply extends java.io.Serializable{ assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid)) + assertTrue(pa.getBestaccessright.getClassid.equals("OPEN")) + assertTrue(pa.getBestaccessright.getClassname.equals("Open Access")) assertTrue(pb.getInstance().get(0).getHostedby.getKey.equals("10|openaire____::0b74b6a356bbf23c245f9ae9a748745c")) assertTrue(pb.getInstance().get(0).getHostedby.getValue.equals("Revistas de investigación Universidad Nacional Mayor de San Marcos")) - assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) - assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("not available")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("UNKNOWN")) assertTrue(pb.getInstance().get(0).getAccessright.getOpenAccessRoute == null) + assertTrue(pb.getBestaccessright.getClassid.equals("UNKNOWN")) + assertTrue(pb.getBestaccessright.getClassname.equals("not available")) }else{ assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals(pb.getInstance().get(0).getHostedby.getKey)) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json index 602bedbda..8f6842bf3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json @@ -7,7 +7,7 @@ {"author":[{"fullname":"López-Lambas, M.","name":"M.","pid":[],"rank":1,"surname":"López-Lambas"},{"fullname":"López-Suárez, E.","name":"E.","pid":[],"rank":2,"surname":"López-Suárez"},{"fullname":"La Paix-Puello, L.","name":"L.","pid":[],"rank":3,"surname":"La Paix-Puello"},{"fullname":"Binsted, A.","name":"A.","pid":[],"rank":4,"surname":"Binsted"},{"fullname":"Tuominen, Anu","name":"Anu","pid":[],"rank":5,"surname":"Tuominen"},{"fullname":"Järvi, Tuuli","name":"Tuuli","pid":[],"rank":6,"surname":"Järvi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"dateofcollection":"2021-07-10T12:26:56.401Z","dateoftransformation":"2021-07-10T14:03:19.178Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813924212,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-31T04:35:29Z","harvestDate":"2021-07-10T12:26:56.401Z","identifier":"oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"European Commission EC"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"López-Lambas , M , López-Suárez , E , La Paix-Puello , L , Binsted , A , Tuominen , A & Järvi , T 2009 , Sustainable Development methodology development and application results : Deliverable 4.1 . European Commission EC ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Sustainable Development methodology development and application results:Deliverable 4.1"}]} {"author":[{"fullname":"Vänskä, L.","name":"L.","pid":[],"rank":1,"surname":"Vänskä"},{"fullname":"Rosenberg, Rolf","name":"Rolf","pid":[],"rank":2,"surname":"Rosenberg"},{"fullname":"Pitkänen, V.","name":"V.","pid":[],"rank":3,"surname":"Pitkänen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"dateofcollection":"2021-07-10T12:29:21.556Z","dateoftransformation":"2021-07-10T15:08:41.325Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

An automatic gamma spectrometer for activation analysis has been developed at the Technical Research Centre of Finland. The on-line system comprises a sample changer for up to 120 samples, detector, multichannel analyzer, microcomputer programmed with Basic language and input/output devices.

"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/0167-5087(83)90428-3"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813653066,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-01T03:06:44Z","harvestDate":"2021-07-10T12:29:21.556Z","identifier":"oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vänskä , L , Rosenberg , R & Pitkänen , V 1983 , ' An automatic gamma spectrometer for activation analysis ' , Nuclear Instruments and Methods In Physics Research , vol. 213 , no. 2-3 , pp. 343 - 347 . https://doi.org/10.1016/0167-5087(83)90428-3"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"An automatic gamma spectrometer for activation analysis"}]} {"author":[{"fullname":"Silla, Anne","name":"Anne","pid":[],"rank":1,"surname":"Silla"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"dateofcollection":"2021-07-10T12:36:22.169Z","dateoftransformation":"2021-07-10T15:19:31.29Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This paper presents an assessment framework with 13 criteria to systematically evaluate measures for improving the safety of level crossings (LCs). The criteria were first applied in the Finnish context, where eight safety measures were estimated to reduce LC accidents by more than 20%. Next, the estimates from the Finnish study were used as a starting point for evaluating innovative and cost-effective LC safety measures piloted during an EU project, SAFER-LC. One such measure was estimated to potentially reduce LC accidents by more than 20%. The proposed assessment framework is a good way to assess and categorise LC safety measures. The summary of the assessment criteria is further intended for decision-makers looking to implement effective measures in a specific situation or at a particular LC."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f7d973bc9fc15080fa9491d997568847","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/19966012-bcd9-4427-8136-a60e91b152f9"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813676961,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-07-02T11:56:26Z","harvestDate":"2021-07-10T12:36:22.169Z","identifier":"oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","50|355e65625b88::f7d973bc9fc15080fa9491d997568847"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Silla , A 2021 , Evaluation of Level crossing Safety Measures : Applicability of the Framework to Innovative and Low-cost Measures . in Transportation Research Board : The TRIS and ITRD database . 100th Annual Meeting of the Transportation Research Board (TRB) , Washington DC , United States , 5/01/21 . < https://trid.trb.org/view/1759287 >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Evaluation of Level crossing Safety Measures:Applicability of the Framework to Innovative and Low-cost Measures"}]} -{"author":[{"fullname":"Barrientos Jiménez, Elsa Julia","name":"Elsa Julia","pid":[],"rank":1,"surname":"Barrientos Jiménez"},{"fullname":"Vildoso Villegas, Jesahel","name":"Jesahel","pid":[],"rank":2,"surname":"Vildoso Villegas"},{"fullname":"Sánchez García, Tula Carola","name":"Tula Carola","pid":[],"rank":3,"surname":"Sánchez García"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"dateofcollection":"2021-03-12T07:05:02.842Z","dateoftransformation":"2021-03-12T07:11:33.642Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This research consists of a diagnosis of the post graduate unit of UNMSM Faculty Education; it counts with the participation of 358 students of Second Specialty, Master and Doctorate programs. To carry out this diagnosis the instrument of the Iberoamerican University Association was taken into account. The following variables were used: students, teachers, study plans, research, management, surroundings, graduates and impact and evaluation. According to the established measurement the post graduate unit has obtained 69,27 points, which places it on a good level. This level considers a range point from 60 through 74."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"La investigación realiza un diagnóstico de la Unidad de Post Grado de la Facultad de Educación de la UNMSM, con la participación de 358 estudiantes de Segunda Especialidad, Maestrías y Doctorado. Para la realización del diagnóstico se consideró el instrumento de la Asociación Universitaria Iberoamericana de Post Grado, que toma en cuenta las siguientes variables: estudiantes, profesores, plan de estudios, investigación, gestión, entorno, egresados e impacto y evaluación. Realizado el diagnóstico de acuerdo a la medición establecida la UPG de Educación de acuerdo al puntaje obtenido de 69, 27 se ubica en el nivel bueno, ya que dicho nivel considera las puntuaciones comprendidas entre 60 y 74."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"distributionlocation":"","hostedby":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by-nc-sa/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://revistasinvestigacion.unmsm.edu.pe/index.php/educa/article/view/4754"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"0001-396X","issnPrinted":"1728-5852","name":"Investigación Educativa","sp":"","vol":""},"language":{"classid":"spa","classname":"Spanish; Castilian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627812364983,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frevistasinvestigacion.unmsm.edu.pe%2Findex.php%2Findex%2Foai","datestamp":"2021-03-06T03:56:00Z","harvestDate":"2021-03-12T07:05:02.842Z","identifier":"oai:ojs.csi.unmsm:article/4754","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","oai:ojs.csi.unmsm:article/4754"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Facultad de Educación, Universidad Nacional Mayor de San Marcos"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol 14 No 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol. 14 Núm. 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1728-5852"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnostic"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluation."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnóstico"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluación."}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNOSTIC OF THE POST GRADUATE UNIT OF UNMSM EDUCATION FACULTY"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNÓSTICO DE LA UNIDAD DE POST GRADO DE LA FACULTAD DE EDUCACIÓN DE LA UNMSM"}]} +{"author":[{"fullname":"Barrientos Jiménez, Elsa Julia","name":"Elsa Julia","pid":[],"rank":1,"surname":"Barrientos Jiménez"},{"fullname":"Vildoso Villegas, Jesahel","name":"Jesahel","pid":[],"rank":2,"surname":"Vildoso Villegas"},{"fullname":"Sánchez García, Tula Carola","name":"Tula Carola","pid":[],"rank":3,"surname":"Sánchez García"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"dateofcollection":"2021-03-12T07:05:02.842Z","dateoftransformation":"2021-03-12T07:11:33.642Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This research consists of a diagnosis of the post graduate unit of UNMSM Faculty Education; it counts with the participation of 358 students of Second Specialty, Master and Doctorate programs. To carry out this diagnosis the instrument of the Iberoamerican University Association was taken into account. The following variables were used: students, teachers, study plans, research, management, surroundings, graduates and impact and evaluation. According to the established measurement the post graduate unit has obtained 69,27 points, which places it on a good level. This level considers a range point from 60 through 74."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"La investigación realiza un diagnóstico de la Unidad de Post Grado de la Facultad de Educación de la UNMSM, con la participación de 358 estudiantes de Segunda Especialidad, Maestrías y Doctorado. Para la realización del diagnóstico se consideró el instrumento de la Asociación Universitaria Iberoamericana de Post Grado, que toma en cuenta las siguientes variables: estudiantes, profesores, plan de estudios, investigación, gestión, entorno, egresados e impacto y evaluación. Realizado el diagnóstico de acuerdo a la medición establecida la UPG de Educación de acuerdo al puntaje obtenido de 69, 27 se ubica en el nivel bueno, ya que dicho nivel considera las puntuaciones comprendidas entre 60 y 74."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"distributionlocation":"","hostedby":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by-nc-sa/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://revistasinvestigacion.unmsm.edu.pe/index.php/educa/article/view/4754"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"0001-396X","issnPrinted":"1728-5852","name":"Investigación Educativa","sp":"","vol":""},"language":{"classid":"spa","classname":"Spanish; Castilian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627812364983,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frevistasinvestigacion.unmsm.edu.pe%2Findex.php%2Findex%2Foai","datestamp":"2021-03-06T03:56:00Z","harvestDate":"2021-03-12T07:05:02.842Z","identifier":"oai:ojs.csi.unmsm:article/4754","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","oai:ojs.csi.unmsm:article/4754"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Facultad de Educación, Universidad Nacional Mayor de San Marcos"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol 14 No 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol. 14 Núm. 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1728-5852"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnostic"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluation."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnóstico"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluación."}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNOSTIC OF THE POST GRADUATE UNIT OF UNMSM EDUCATION FACULTY"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNÓSTICO DE LA UNIDAD DE POST GRADO DE LA FACULTAD DE EDUCACIÓN DE LA UNMSM"}]} {"author":[{"affiliation":[],"fullname":"Jež Rogelj, Mateja","name":"Mateja","pid":[],"rank":1,"surname":"Jež Rogelj"},{"affiliation":[],"fullname":"Mikuš, Ornella","name":"Ornella","pid":[],"rank":2,"surname":"Mikuš"},{"affiliation":[],"fullname":"Grgić, Ivo","name":"Ivo","pid":[],"rank":3,"surname":"Grgić"},{"affiliation":[],"fullname":"Zrakić, Magdalena","name":"Magdalena","pid":[],"rank":4,"surname":"Zrakić"},{"affiliation":[],"fullname":"Hadelan, Lari","name":"Lari","pid":[],"rank":5,"surname":"Hadelan"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"dateofcollection":"2021-07-11T01:25:30.597Z","dateoftransformation":"2021-07-11T02:00:20.813Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Koncept održivog razvoja ima tri komponente: ekološku, ekonomsku i društvenu. U ovom je radu odabirom pet ekoloških indikatora obuhvaćena ekološka komponenta. Indikatori su birani s obzirom na učestalost njihova predlaganja i korištenja u dokumentima Europske unije (EU) i znanstvenim radovima. Cilj rada je vrednovati ekološke indikatore uz argumentiranje njihove važnosti za postizanje održivog ruralnog razvoja provođenjem ankete među ekspertima i različitim dionicima ruralnog razvoja. U anketi je sudjelovalo 47 ispitanika. Od predloženih je indikatora najvišu prosječnu ocjenu dobio indikator zastupljenost ekološke poljoprivrede u ukupnoj poljoprivredi (4, 15), a slijede ga biološka raznolikost biljnih i životinjskih vrsta (4, 09), upotreba pesticida/ha (4, 06), broj uvjetnih grla/ha korištenog zemljišta (4, 00) i upotreba mineralnih gnojiva/ha (3, 91)."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/877152"]}],"language":{"classid":"hrv","classname":"Croatian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813176553,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2017-05-25T04:42:10Z","harvestDate":"2021-07-11T01:25:30.597Z","identifier":"877152","metadataNamespace":""}},"originalId":["877152","50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Prijedlog ekoloških indikatora za mjerenje održivog ruralnog razvoja"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proposal of the environmental indicators for measuring sustainable rural development"}]} {"author":[{"affiliation":[],"fullname":"Petric, Bartul","name":"Bartul","pid":[],"rank":1,"surname":"Petric"},{"affiliation":[],"fullname":"Petric, Nedjeljka","name":"Nedjeljka","pid":[],"rank":2,"surname":"Petric"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Oliver Le Faucheux"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"dateofcollection":"2021-07-11T00:42:48.748Z","dateoftransformation":"2021-07-11T02:01:00.178Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"In the present paper it was studied the waste sludge separation and its use, with the purpose to prevent the sea water pollution."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/314877"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813187318,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2007-12-18T09:21:18Z","harvestDate":"2021-07-11T00:42:48.748Z","identifier":"314877","metadataNamespace":""}},"originalId":["314877","50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1977-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Treatment of Waste Sea Water in Calcium Carbide Industry"}]} {"author":[{"affiliation":[],"fullname":"Erstić, Marijana","name":"Marijana","pid":[],"rank":1,"surname":"Erstić"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"dateofcollection":"2021-07-11T01:23:38.842Z","dateoftransformation":"2021-07-11T02:01:53.063Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Die Filme Michael Hanekes scheinen vor allem mit jenen des späten Pasolini zu korrespondieren, allen voran mit SALÒ aber auch mit TEOREMA, jenem Film, in dem sich Pasolini dem italienischen Großbürgertum der ausgehenden 1960er Jahre widmet. Erzählt wird in TEOREMA die Geschichte einer zu Beginn des Films anscheinend intakten Familie, die sich durch die Begegnung mit einem Unbekannten der eigenen Leere bewusst wird und auseinanderfällt. Der Film endet mit dem Schrei eines der Protagonisten, der hier jedoch weniger ein neues Leben bedeutet, als vielmehr eine Reaktion auf die repressiven und normativen Mechanismen der Gesellschaft. Hanekes Filme LEMMINGE, TEIL II und DER SIEBENTE KONTINENT gehen jeweils unter-schiedlich von einer vergleichbaren Prämisse einer leeren Familie aus. Doch während die Schreie in den LEMMINGEN immer lauter werden, verstummen sie im SIEBENTEN KONTINENT schließlich. In allen benannten Filmen hat das Werk Francis Bacons eine besondere Rolle gespielt und wurde entweder explizit (TEOREMA, LEMMINGE II) oder implizit (DER SIEBENTE KONTINENT) thematisiert. Der Vortrag geht den Bildern des (stummen) Schreis in den genannten Filmen nach."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/848053"]}],"language":{"classid":"deu/ger","classname":"German","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813203778,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2016-12-06T18:47:39Z","harvestDate":"2021-07-11T01:23:38.842Z","identifier":"848053","metadataNamespace":""}},"originalId":["848053","50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2014-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" von Pier Paolo Pasolini oder: Ein Schrei auf Francis Bacons Spuren"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" by Pier Paolo Pasolini"}]} \ No newline at end of file From 2ee21da43b8a93b8db9f75528e8c4e268e1b6187 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 11 Aug 2021 12:13:22 +0200 Subject: [PATCH 159/287] suggestions from SonarLint --- .../GenerateOoziePropertiesMojo.java | 12 +- .../WritePredefinedProjectProperties.java | 11 +- .../GenerateOoziePropertiesMojoTest.java | 16 +- .../WritePredefinedProjectPropertiesTest.java | 32 ++-- .../dhp/application/ApplicationUtils.java | 14 -- .../ArgumentApplicationParser.java | 8 +- .../dhp/application/OptionsParameter.java | 3 - .../dnetlib/dhp/collection/ApiDescriptor.java | 2 +- .../java/eu/dnetlib/dhp/common/Constants.java | 3 + .../dnetlib/dhp/common/GraphResultMapper.java | 17 +- .../eu/dnetlib/dhp/common/MakeTarArchive.java | 23 +-- .../eu/dnetlib/dhp/common/MdstoreClient.java | 9 +- .../eu/dnetlib/dhp/common/PacePerson.java | 1 - .../dhp/common/api/ZenodoAPIClient.java | 27 +-- .../dhp/common/api/zenodo/Creator.java | 6 +- .../dnetlib/dhp/common/api/zenodo/File.java | 14 -- .../dhp/common/rest/DNetRestClient.java | 37 ++-- .../dhp/common/vocabulary/Vocabulary.java | 3 +- .../common/vocabulary/VocabularyGroup.java | 13 +- .../java/eu/dnetlib/dhp/message/Message.java | 5 +- .../eu/dnetlib/dhp/oa/merge/AuthorMerger.java | 17 +- .../dhp/parser/utility/VtdUtilityParser.java | 3 + .../oaf/utils/GraphCleaningFunctions.java | 2 +- .../dhp/schema/oaf/utils/OafMapperUtils.java | 17 +- .../java/eu/dnetlib/dhp/utils/DHPUtils.java | 32 +--- .../dhp/utils/ISLookupClientFactory.java | 15 +- .../saxon/AbstractExtensionFunction.java | 2 +- .../dnetlib/dhp/utils/saxon/ExtractYear.java | 5 +- .../dhp/utils/saxon/NormalizeDate.java | 2 +- .../eu/dnetlib/dhp/utils/saxon/PickFirst.java | 5 +- .../utils/saxon/SaxonTransformerFactory.java | 3 + .../ArgumentApplicationParserTest.java | 4 +- .../dnetlib/dhp/common/HdfsSupportTest.java | 10 +- .../eu/dnetlib/dhp/common/PacePersonTest.java | 6 +- .../dhp/common/SparkSessionSupportTest.java | 6 +- .../dhp/common/api/ZenodoAPIClientTest.java | 10 +- .../dhp/oa/merge/AuthorMergerTest.java | 4 +- .../schema/oaf/utils/OafMapperUtilsTest.java | 36 ++-- .../relation/RelationMapperTest.java | 4 +- .../dnetlib/dhp/actionmanager/ISClient.java | 29 ++- .../actionmanager/promote/MergeAndGet.java | 1 + .../PromoteActionPayloadForGraphTableJob.java | 4 +- ...rtitionActionSetsByPayloadTypeJobTest.java | 2 +- .../promote/MergeAndGetTest.java | 32 ++-- ...moteActionPayloadForGraphTableJobTest.java | 4 +- .../PromoteActionPayloadFunctionsTest.java | 8 +- .../bipfinder/CollectAndSave.java | 12 +- .../bipfinder/SparkAtomicActionScoreJob.java | 6 +- .../project/PrepareProgramme.java | 19 +- .../project/PrepareProjects.java | 2 +- .../project/ReadProjectsFromDB.java | 21 +-- .../project/SparkAtomicActionJob.java | 21 +-- .../project/utils/CSVParser.java | 6 +- .../project/utils/EXCELParser.java | 82 ++++----- .../actionmanager/project/utils/ReadCSV.java | 21 ++- .../project/utils/ReadExcel.java | 30 ++-- .../ror/GenerateRorActionSetJob.java | 7 +- .../dhp/actionmanager/ror/model/Address.java | 4 +- .../dhp/actionmanager/ror/model/Country.java | 4 +- .../ror/model/ExternalIdType.java | 2 +- .../ror/model/ExternalIdTypeDeserializer.java | 3 +- .../ror/model/GeonamesAdmin.java | 2 +- .../actionmanager/ror/model/GeonamesCity.java | 2 +- .../dhp/actionmanager/ror/model/Label.java | 2 +- .../dhp/actionmanager/ror/model/License.java | 2 +- .../actionmanager/ror/model/NameAndCode.java | 4 +- .../actionmanager/ror/model/Relationship.java | 4 +- .../ror/model/RorOrganization.java | 4 +- .../aggregation/common/AggregatorReport.java | 6 +- .../dhp/aggregation/common/ReportingJob.java | 2 +- .../mdstore/MDStoreActionNode.java | 6 +- .../dhp/collection/CollectorWorker.java | 2 +- .../GenerateNativeStoreSparkJob.java | 1 + .../dhp/collection/HttpConnector2.java | 18 +- .../mongodb/MDStoreCollectorPlugin.java | 2 - .../mongodb/MongoDbDumpCollectorPlugin.java | 2 +- .../collection/plugin/oai/OaiIterator.java | 22 +-- .../collection/plugin/rest/RestIterator.java | 26 +-- .../transformation/TransformSparkJobNode.java | 10 +- .../transformation/TransformationFactory.java | 17 +- .../dhp/transformation/xslt/DateCleaner.java | 5 - .../transformation/xslt/PersonCleaner.java | 26 --- .../xslt/XSLTTransformationFunction.java | 10 +- .../transformation/xslt/utils/Capitalize.java | 2 - .../SparkAtomicActionScoreJobTest.java | 30 ++-- .../actionmanager/project/CSVParserTest.java | 4 +- .../project/EXCELParserTest.java | 2 +- .../project/PrepareH2020ProgrammeTest.java | 2 +- .../project/PrepareProjectTest.java | 2 +- .../project/SparkUpdateProjectTest.java | 2 +- .../ror/GenerateRorActionSetJobTest.java | 10 +- .../GenerateNativeStoreSparkJobTest.java | 12 +- .../plugin/rest/RestCollectorPluginTest.java | 4 +- .../CollectorWorkerApplicationTests.java | 6 +- .../transformation/TransformationJobTest.java | 16 +- .../dhp/blacklist/ReadBlacklistFromDB.java | 23 +-- .../SparkRemoveBlacklistedRelationJob.java | 6 +- .../dnetlib/dhp/blacklist/BlackListTest.java | 6 +- .../dhp/broker/model/EventFactory.java | 19 +- .../dhp/broker/oa/CheckDuplictedIdsJob.java | 11 +- .../dhp/broker/oa/IndexEventSubsetJob.java | 6 +- .../dhp/broker/oa/IndexNotificationsJob.java | 29 ++- .../dnetlib/dhp/broker/oa/IndexOnESJob.java | 3 +- .../broker/oa/PartitionEventsByDsIdJob.java | 13 +- .../oa/PrepareRelatedDatasourcesJob.java | 2 +- .../dhp/broker/oa/matchers/UpdateMatcher.java | 2 +- .../AbstractEnrichMissingDataset.java | 4 +- .../relatedProjects/EnrichMissingProject.java | 2 +- .../relatedProjects/EnrichMoreProject.java | 4 +- .../AbstractEnrichMissingPublication.java | 4 +- .../EnrichMissingSoftware.java | 2 +- .../relatedSoftware/EnrichMoreSoftware.java | 2 +- .../simple/EnrichMissingAuthorOrcid.java | 2 +- .../oa/matchers/simple/EnrichMissingPid.java | 2 +- .../matchers/simple/EnrichMissingSubject.java | 4 +- .../matchers/simple/EnrichMoreOpenAccess.java | 2 +- .../oa/matchers/simple/EnrichMorePid.java | 4 +- .../oa/matchers/simple/EnrichMoreSubject.java | 4 +- .../dhp/broker/oa/util/BrokerConstants.java | 3 + .../dhp/broker/oa/util/ClusterUtils.java | 3 + .../dhp/broker/oa/util/ConversionUtils.java | 26 +-- .../util/DatasourceRelationsAccumulator.java | 8 +- .../dhp/broker/oa/util/EventFinder.java | 5 +- .../dhp/broker/oa/util/SubscriptionUtils.java | 3 + .../dhp/broker/oa/util/TrustUtils.java | 2 + .../dhp/broker/oa/util/UpdateInfo.java | 4 +- .../broker/oa/matchers/UpdateMatcherTest.java | 5 +- .../EnrichMissingPublicationDateTest.java | 3 +- .../dhp/broker/oa/util/TrustUtilsTest.java | 26 +-- .../dhp/oa/dedup/AbstractSparkAction.java | 20 ++- .../eu/dnetlib/dhp/oa/dedup/DatePicker.java | 5 +- .../dhp/oa/dedup/DedupRecordFactory.java | 12 +- .../eu/dnetlib/dhp/oa/dedup/DedupUtility.java | 10 +- .../oa/dedup/DispatchEntitiesSparkJob.java | 9 +- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 6 +- .../eu/dnetlib/dhp/oa/dedup/IdGenerator.java | 4 +- .../dnetlib/dhp/oa/dedup/SparkBlockStats.java | 4 +- .../oa/dedup/SparkCopyOpenorgsSimRels.java | 5 +- .../dedup/SparkCopyRelationsNoOpenorgs.java | 21 +-- .../dhp/oa/dedup/SparkCreateDedupRecord.java | 3 +- .../dhp/oa/dedup/SparkCreateMergeRels.java | 14 +- .../oa/dedup/SparkCreateOrgsDedupRecord.java | 3 - .../dhp/oa/dedup/SparkCreateSimRels.java | 9 +- .../dhp/oa/dedup/SparkPrepareNewOrgs.java | 13 +- .../dhp/oa/dedup/SparkPrepareOrgRels.java | 24 +-- .../dhp/oa/dedup/SparkPropagateRelation.java | 6 +- .../dhp/oa/dedup/SparkUpdateEntity.java | 125 +++++++------ .../oa/dedup/graph/ConnectedComponent.java | 14 -- .../dhp/oa/dedup/model/Identifier.java | 8 +- .../dnetlib/dhp/oa/dedup/DatePickerTest.java | 8 +- .../dhp/oa/dedup/EntityMergerTest.java | 14 +- .../dnetlib/dhp/oa/dedup/IdGeneratorTest.java | 6 +- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 15 +- .../dhp/oa/dedup/SparkOpenorgsDedupTest.java | 20 +-- .../oa/dedup/SparkOpenorgsProvisionTest.java | 16 +- .../dnetlib/dhp/oa/dedup/SparkStatsTest.java | 2 +- .../dhp/oa/dedup/jpath/JsonPathTest.java | 11 +- .../doiboost/crossref/CrossrefImporter.java | 9 +- .../dnetlib/doiboost/crossref/ESClient.java | 7 +- .../orcid/ActivitiesDecompressor.java | 25 +-- .../orcid/ExtractXMLActivitiesData.java | 7 +- .../orcid/ExtractXMLSummariesData.java | 10 +- .../orcid/OrcidAuthorsDOIsDataGen.java | 3 +- .../doiboost/orcid/OrcidDSManager.java | 3 +- .../orcid/SparkDownloadOrcidAuthors.java | 60 ++++--- .../orcid/SparkDownloadOrcidWorks.java | 46 ++--- .../orcid/SparkGenLastModifiedSeq.java | 24 ++- .../orcid/SparkGenerateDoiAuthorList.java | 20 +-- .../orcid/SparkUpdateOrcidAuthors.java | 35 ++-- .../orcid/SparkUpdateOrcidDatasets.java | 167 ++---------------- .../doiboost/orcid/SparkUpdateOrcidWorks.java | 27 ++- .../doiboost/orcid/SummariesDecompressor.java | 24 +-- .../doiboost/orcid/json/JsonHelper.java | 3 + .../dnetlib/doiboost/orcid/util/HDFSUtil.java | 37 ++-- .../doiboost/orcid/xml/XMLRecordParser.java | 54 +----- .../orcidnodoi/ActivitiesDumpReader.java | 18 +- .../orcidnodoi/GenOrcidAuthorWork.java | 6 +- .../SparkGenEnrichedOrcidWorks.java | 53 +++--- .../doiboost/orcidnodoi/json/JsonWriter.java | 3 + .../orcidnodoi/oaf/PublicationToOaf.java | 47 ++--- .../orcidnodoi/similarity/AuthorMatcher.java | 44 ++--- .../orcidnodoi/xml/XMLRecordParserNoDoi.java | 37 ++-- .../doiboost/orcid/OrcidClientTest.java | 25 +-- .../orcid/xml/XMLRecordParserTest.java | 42 +---- .../orcidnodoi/PublicationToOafTest.java | 23 +-- .../orcidnodoi/xml/OrcidNoDoiTest.java | 141 ++++++--------- .../eu/dnetlib/dhp/PropagationConstant.java | 11 +- .../dhp/bulktag/community/Community.java | 5 - .../community/CommunityConfiguration.java | 14 +- .../CommunityConfigurationFactory.java | 10 +- .../dhp/bulktag/community/Constraint.java | 3 - .../dhp/bulktag/community/Constraints.java | 14 +- .../dhp/bulktag/community/Provider.java | 8 - .../community/QueryInformationSystem.java | 3 +- .../dhp/bulktag/community/ResultTagger.java | 98 +++++----- .../community/SelectionConstraints.java | 3 - .../bulktag/community/TaggingConstants.java | 3 + .../bulktag/community/ZenodoCommunity.java | 1 - .../dhp/bulktag/criteria/Selection.java | 4 +- .../dhp/bulktag/criteria/VerbResolver.java | 8 +- .../bulktag/criteria/VerbResolverFactory.java | 3 + .../PrepareDatasourceCountryAssociation.java | 25 ++- .../PrepareResultCountrySet.java | 9 +- .../SparkCountryPropagationJob.java | 12 +- .../PrepareResultOrcidAssociationStep1.java | 3 - .../PrepareResultOrcidAssociationStep2.java | 2 +- .../SparkOrcidToResultFromSemRelJob.java | 36 ++-- .../PrepareProjectResultsAssociation.java | 7 +- .../SparkResultToProjectThroughSemRelJob.java | 44 ++--- .../PrepareResultCommunitySet.java | 11 +- ...kResultToCommunityFromOrganizationJob.java | 16 +- .../PrepareResultCommunitySetStep2.java | 3 +- ...parkResultToCommunityThroughSemRelJob.java | 10 +- .../PrepareResultInstRepoAssociation.java | 13 +- ...arkResultToOrganizationFromIstRepoJob.java | 58 +++--- .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 16 +- .../CommunityConfigurationFactoryTest.java | 9 +- .../CountryPropagationJobTest.java | 27 ++- .../OrcidPropagationJobTest.java | 6 +- .../ProjectPropagationJobTest.java | 6 +- .../ResultToCommunityJobTest.java | 2 +- .../ResultToCommunityJobTest.java | 2 +- .../ResultToOrganizationJobTest.java | 6 +- .../dhp/oa/graph/clean/CleaningRuleMap.java | 2 +- .../dhp/oa/graph/clean/OafCleaner.java | 2 +- .../eu/dnetlib/dhp/oa/graph/dump/MakeTar.java | 13 +- .../oa/graph/dump/QueryInformationSystem.java | 9 +- .../dhp/oa/graph/dump/ResultMapper.java | 18 +- .../dhp/oa/graph/dump/SaveCommunityMap.java | 16 +- .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 11 +- .../eu/dnetlib/dhp/oa/graph/dump/Utils.java | 5 +- .../graph/dump/community/CommunitySplit.java | 21 +-- .../community/SparkDumpCommunityProducts.java | 5 +- .../community/SparkPrepareResultProject.java | 27 ++- .../community/SparkUpdateProjectInfo.java | 6 +- .../dhp/oa/graph/dump/complete/Constants.java | 1 - .../dump/complete/CreateContextEntities.java | 21 ++- .../dump/complete/CreateContextRelation.java | 24 ++- .../dhp/oa/graph/dump/complete/Process.java | 4 +- .../dump/complete/QueryInformationSystem.java | 8 +- .../complete/SparkOrganizationRelation.java | 5 +- .../funderresults/SparkDumpFunderResults.java | 9 +- .../SparkResultLinkedToProject.java | 8 +- .../DatasourceCompatibilityComparator.java | 20 --- .../graph/merge/MergeGraphTableSparkJob.java | 14 +- .../raw/AbstractMdRecordToOafMapper.java | 24 +-- .../raw/GenerateEntitiesApplication.java | 10 +- .../oa/graph/raw/MergeClaimsApplication.java | 8 +- .../raw/MigrateDbEntitiesApplication.java | 25 +-- .../raw/MigrateHdfsMdstoresApplication.java | 14 +- .../raw/MigrateMongoMdstoresApplication.java | 4 +- .../dhp/oa/graph/raw/OafToOafMapper.java | 5 +- .../dhp/oa/graph/raw/OdfToOafMapper.java | 9 +- .../graph/raw/PatchRelationsApplication.java | 2 - .../common/AbstractMigrationApplication.java | 16 +- .../oa/graph/GraphHiveImporterJobTest.java | 2 +- .../clean/GraphCleaningFunctionsTest.java | 6 +- .../dhp/oa/graph/dump/GenerateJsonSchema.java | 4 +- .../dhp/oa/graph/dump/MakeTarTest.java | 2 +- .../dump/PrepareResultProjectJobTest.java | 131 +++++++------- .../dump/QueryInformationSystemTest.java | 9 +- .../oa/graph/dump/SplitForCommunityTest.java | 2 +- .../oa/graph/dump/UpdateProjectInfoTest.java | 2 +- .../dhp/oa/graph/dump/ZenodoUploadTest.java | 6 +- .../graph/dump/complete/CreateEntityTest.java | 4 +- .../dump/complete/CreateRelationTest.java | 4 +- .../ExtractRelationFromEntityTest.java | 2 +- .../dump/complete/FunderParsingTest.java | 7 +- .../complete/QueryInformationSystemTest.java | 10 +- .../RelationFromOrganizationTest.java | 2 +- .../ResultLinkedToProjectTest.java | 4 +- .../dump/funderresult/SplitPerFunderTest.java | 2 +- .../merge/MergeGraphTableSparkJobTest.java | 4 +- .../raw/GenerateEntitiesApplicationTest.java | 4 +- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 108 ++++++----- .../raw/MigrateDbEntitiesApplicationTest.java | 22 ++- .../MigrateMongoMdstoresApplicationTest.java | 2 - .../raw/PatchRelationApplicationTest.java | 2 +- .../oa/graph/reflections/ReflectionTest.java | 2 +- .../CreateRelatedEntitiesJob_phase1.java | 18 +- .../CreateRelatedEntitiesJob_phase2.java | 8 +- .../dhp/oa/provision/PrepareRelationsJob.java | 12 +- .../dhp/oa/provision/ProvisionConstants.java | 3 + .../oa/provision/SolrAdminApplication.java | 1 - .../dhp/oa/provision/XmlConverterJob.java | 7 +- .../dhp/oa/provision/XmlIndexingJob.java | 15 +- .../dhp/oa/provision/model/JoinedEntity.java | 1 - .../model/ProvisionModelSupport.java | 3 + .../dhp/oa/provision/model/RelatedEntity.java | 1 - .../utils/AuthorPidTypeComparator.java | 1 - .../dhp/oa/provision/utils/ContextMapper.java | 9 +- .../oa/provision/utils/GraphMappingUtils.java | 6 +- .../oa/provision/utils/ISLookupClient.java | 9 +- .../oa/provision/utils/LicenseComparator.java | 69 -------- .../utils/StreamingInputDocumentFactory.java | 79 +++++---- .../oa/provision/utils/TemplateFactory.java | 4 +- .../oa/provision/utils/XmlRecordFactory.java | 73 +++----- .../utils/XmlSerializationUtils.java | 29 ++- .../dhp/oa/provision/utils/ZkServers.java | 2 +- .../sx/provision/DropAndCreateESIndex.java | 1 + .../provision/SparkIndexCollectionOnES.java | 1 + .../provision/IndexRecordTransformerTest.java | 23 ++- .../oa/provision/PrepareRelationsJobTest.java | 19 +- .../provision/SolrAdminApplicationTest.java | 17 +- .../oa/provision/SortableRelationKeyTest.java | 1 - .../dhp/oa/provision/XmlIndexingJobTest.java | 4 +- .../oa/provision/XmlRecordFactoryTest.java | 20 +-- .../graph/usagerawdata/export/ConnectDB.java | 20 +-- .../usagestatsbuild/export/ConnectDB.java | 18 +- 309 files changed, 1874 insertions(+), 2497 deletions(-) delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java delete mode 100644 dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java index 10a25fdc3..a642dab70 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java @@ -8,8 +8,6 @@ import java.util.List; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.apache.maven.plugin.AbstractMojo; -import org.apache.maven.plugin.MojoExecutionException; -import org.apache.maven.plugin.MojoFailureException; /** * Generates oozie properties which were not provided from commandline. @@ -27,7 +25,7 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo { }; @Override - public void execute() throws MojoExecutionException, MojoFailureException { + public void execute() { if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) && !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) { String generatedSandboxName = generateSandboxName( @@ -46,24 +44,24 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo { /** * Generates sandbox name from workflow source directory. * - * @param wfSourceDir + * @param wfSourceDir workflow source directory * @return generated sandbox name */ private String generateSandboxName(String wfSourceDir) { // utilize all dir names until finding one of the limiters - List sandboxNameParts = new ArrayList(); + List sandboxNameParts = new ArrayList<>(); String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar); ArrayUtils.reverse(tokens); if (tokens.length > 0) { for (String token : tokens) { for (String limiter : limiters) { if (limiter.equals(token)) { - return sandboxNameParts.size() > 0 + return !sandboxNameParts.isEmpty() ? StringUtils.join(sandboxNameParts.toArray()) : null; } } - if (sandboxNameParts.size() > 0) { + if (!sandboxNameParts.isEmpty()) { sandboxNameParts.add(0, File.separator); } sandboxNameParts.add(0, token); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java index d195ca86e..e3cdf5a22 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java @@ -16,6 +16,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -289,7 +290,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { */ protected List getEscapeChars(String escapeChars) { List tokens = getListFromCSV(escapeChars); - List realTokens = new ArrayList(); + List realTokens = new ArrayList<>(); for (String token : tokens) { String realToken = getRealToken(token); realTokens.add(realToken); @@ -324,7 +325,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { * @return content */ protected String getContent(String comment, Properties properties, List escapeTokens) { - List names = new ArrayList(properties.stringPropertyNames()); + List names = new ArrayList<>(properties.stringPropertyNames()); Collections.sort(names); StringBuilder sb = new StringBuilder(); if (!StringUtils.isBlank(comment)) { @@ -352,7 +353,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { throws MojoExecutionException { try { String content = getContent(comment, properties, escapeTokens); - FileUtils.writeStringToFile(file, content, ENCODING_UTF8); + FileUtils.writeStringToFile(file, content, StandardCharsets.UTF_8); } catch (IOException e) { throw new MojoExecutionException("Error creating properties file", e); } @@ -399,9 +400,9 @@ public class WritePredefinedProjectProperties extends AbstractMojo { */ protected static final List getListFromCSV(String csv) { if (StringUtils.isBlank(csv)) { - return new ArrayList(); + return new ArrayList<>(); } - List list = new ArrayList(); + List list = new ArrayList<>(); String[] tokens = StringUtils.split(csv, ","); for (String token : tokens) { list.add(token.trim()); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java index 4bfcd3b33..2ff6bea30 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java @@ -9,18 +9,18 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** @author mhorst, claudio.atzori */ -public class GenerateOoziePropertiesMojoTest { +class GenerateOoziePropertiesMojoTest { private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); @BeforeEach - public void clearSystemProperties() { + void clearSystemProperties() { System.clearProperty(PROPERTY_NAME_SANDBOX_NAME); System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR); } @Test - public void testExecuteEmpty() throws Exception { + void testExecuteEmpty() throws Exception { // execute mojo.execute(); @@ -29,7 +29,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteSandboxNameAlreadySet() throws Exception { + void testExecuteSandboxNameAlreadySet() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; String sandboxName = "originalSandboxName"; @@ -44,7 +44,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteEmptyWorkflowSourceDir() throws Exception { + void testExecuteEmptyWorkflowSourceDir() throws Exception { // given String workflowSourceDir = ""; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -57,7 +57,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteNullSandboxNameGenerated() throws Exception { + void testExecuteNullSandboxNameGenerated() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -70,7 +70,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecute() throws Exception { + void testExecute() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -83,7 +83,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteWithoutRoot() throws Exception { + void testExecuteWithoutRoot() throws Exception { // given String workflowSourceDir = "wf/transformers"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java index 0b3ea9653..84b962b4b 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java @@ -20,7 +20,7 @@ import org.mockito.junit.jupiter.MockitoExtension; /** @author mhorst, claudio.atzori */ @ExtendWith(MockitoExtension.class) -public class WritePredefinedProjectPropertiesTest { +class WritePredefinedProjectPropertiesTest { @Mock private MavenProject mavenProject; @@ -39,7 +39,7 @@ public class WritePredefinedProjectPropertiesTest { // ----------------------------------- TESTS --------------------------------------------- @Test - public void testExecuteEmpty() throws Exception { + void testExecuteEmpty() throws Exception { // execute mojo.execute(); @@ -50,7 +50,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectProperties() throws Exception { + void testExecuteWithProjectProperties() throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -70,7 +70,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test() - public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { + void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -84,7 +84,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { + void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -108,7 +108,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { + void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -132,7 +132,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { + void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -164,7 +164,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -194,7 +194,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromBlankLocation() { + void testExecuteIncludingPropertyKeysFromBlankLocation() { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -214,7 +214,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -247,7 +247,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -273,7 +273,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { + void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { // given mojo.setQuiet(true); mojo.setIncludePropertyKeysFromFiles(new String[] { @@ -290,7 +290,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromInvalidFile() { + void testExecuteIncludingPropertyKeysFromInvalidFile() { // given mojo.setIncludePropertyKeysFromFiles(new String[] { "invalid location" @@ -301,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { + void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { // given mojo.setIncludeEnvironmentVariables(true); @@ -318,7 +318,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { + void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { // given String key = "systemPropertyKey"; String value = "systemPropertyValue"; @@ -337,7 +337,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) + void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) throws Exception { // given String key = "systemPropertyKey "; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java deleted file mode 100644 index c53b83561..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java +++ /dev/null @@ -1,14 +0,0 @@ - -package eu.dnetlib.dhp.application; - -import java.io.*; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; - -import com.google.common.collect.Maps; - -public class ApplicationUtils { - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java index 0429bc25d..72c1f6a5e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java @@ -56,13 +56,13 @@ public class ArgumentApplicationParser implements Serializable { final StringWriter stringWriter = new StringWriter(); IOUtils.copy(gis, stringWriter); return stringWriter.toString(); - } catch (Throwable e) { - log.error("Wrong value to decompress:" + abstractCompressed); - throw new RuntimeException(e); + } catch (IOException e) { + log.error("Wrong value to decompress: {}", abstractCompressed); + throw new IllegalArgumentException(e); } } - public static String compressArgument(final String value) throws Exception { + public static String compressArgument(final String value) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); GZIPOutputStream gzip = new GZIPOutputStream(out); gzip.write(value.getBytes()); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java index 7004112e4..f34326d67 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java @@ -9,9 +9,6 @@ public class OptionsParameter { private boolean paramRequired; private boolean compressed; - public OptionsParameter() { - } - public String getParamName() { return paramName; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java b/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java index 12937a197..fbbbffcbb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java @@ -34,7 +34,7 @@ public class ApiDescriptor { return params; } - public void setParams(final HashMap params) { + public void setParams(final Map params) { this.params = params; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index 108edad47..8fab94e92 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -12,6 +12,9 @@ public class Constants { public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; + private Constants() { + } + static { accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("RESTRICTED", "c_16ec"); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java index 44599eb83..8ceee5c8a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java @@ -84,7 +84,7 @@ public class GraphResultMapper implements Serializable { .setDocumentationUrl( value .stream() - .map(v -> v.getValue()) + .map(Field::getValue) .collect(Collectors.toList()))); Optional @@ -100,20 +100,20 @@ public class GraphResultMapper implements Serializable { .setContactgroup( Optional .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setContactperson( Optional .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setTool( Optional .ofNullable(ir.getTool()) - .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); @@ -123,7 +123,8 @@ public class GraphResultMapper implements Serializable { Optional .ofNullable(input.getAuthor()) - .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); + .ifPresent( + ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList()))); // I do not map Access Right UNKNOWN or OTHER @@ -210,7 +211,7 @@ public class GraphResultMapper implements Serializable { if (oInst.isPresent()) { out .setInstance( - oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList())); + oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList())); } @@ -230,7 +231,7 @@ public class GraphResultMapper implements Serializable { .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) .collect(Collectors.toList()); - if (iTitle.size() > 0) { + if (!iTitle.isEmpty()) { out.setMaintitle(iTitle.get(0).getValue()); } @@ -239,7 +240,7 @@ public class GraphResultMapper implements Serializable { .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) .collect(Collectors.toList()); - if (iTitle.size() > 0) { + if (!iTitle.isEmpty()) { out.setSubtitle(iTitle.get(0).getValue()); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java index 7dc0e4417..d0909642c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java @@ -14,38 +14,33 @@ public class MakeTarArchive implements Serializable { private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException { Path hdfsWritePath = new Path(outputPath); - FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); - - return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); + return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream()); } private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name) throws IOException { Path hdfsWritePath = new Path(outputPath); - FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + try (TarArchiveOutputStream ar = new TarArchiveOutputStream( + fileSystem.create(hdfsWritePath).getWrappedStream())) { - TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); + RemoteIterator iterator = fileSystem + .listFiles( + new Path(inputPath), true); - RemoteIterator fileStatusListIterator = fileSystem - .listFiles( - new Path(inputPath), true); + while (iterator.hasNext()) { + writeCurrentFile(fileSystem, dir_name, iterator, ar, 0); + } - while (fileStatusListIterator.hasNext()) { - writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0); } - - ar.close(); } public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name, diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java index 0bc782ccb..d06544ae1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java @@ -10,8 +10,6 @@ import java.util.Optional; import java.util.stream.StreamSupport; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.bson.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -21,6 +19,7 @@ import com.mongodb.BasicDBObject; import com.mongodb.MongoClient; import com.mongodb.MongoClientURI; import com.mongodb.QueryBuilder; +import com.mongodb.client.FindIterable; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; @@ -46,7 +45,7 @@ public class MdstoreClient implements Closeable { final String currentId = Optional .ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query)) - .map(r -> r.first()) + .map(FindIterable::first) .map(d -> d.getString("currentId")) .orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId)); @@ -84,7 +83,7 @@ public class MdstoreClient implements Closeable { if (!Iterables.contains(client.listDatabaseNames(), dbName)) { final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress()); log.warn(err); - throw new RuntimeException(err); + throw new IllegalArgumentException(err); } return client.getDatabase(dbName); } @@ -97,7 +96,7 @@ public class MdstoreClient implements Closeable { String.format("Missing collection '%s' in database '%s'", collName, db.getName())); log.warn(err); if (abortIfMissing) { - throw new RuntimeException(err); + throw new IllegalArgumentException(err); } else { return null; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index 6e02ca614..91c6c1825 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -24,7 +24,6 @@ import com.google.common.hash.Hashing; */ public class PacePerson { - private static final String UTF8 = "UTF-8"; private List name = Lists.newArrayList(); private List surname = Lists.newArrayList(); private List fullname = Lists.newArrayList(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java index 1f267733d..3f5c6ad4a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java @@ -5,6 +5,9 @@ import java.io.*; import java.io.IOException; import java.util.concurrent.TimeUnit; +import org.apache.http.HttpHeaders; +import org.apache.http.entity.ContentType; + import com.google.gson.Gson; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; @@ -43,7 +46,7 @@ public class ZenodoAPIClient implements Serializable { this.deposition_id = deposition_id; } - public ZenodoAPIClient(String urlString, String access_token) throws IOException { + public ZenodoAPIClient(String urlString, String access_token) { this.urlString = urlString; this.access_token = access_token; @@ -63,8 +66,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .post(body) .build(); @@ -103,8 +106,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(bucket + "/" + file_name) - .addHeader("Content-Type", "application/zip") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) .build(); @@ -130,8 +133,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString + "/" + deposition_id) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .put(body) .build(); @@ -197,7 +200,7 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString + "/" + deposition_id + "/actions/newversion") - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .post(body) .build(); @@ -270,8 +273,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .get() .build(); @@ -293,8 +296,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(url) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .get() .build(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java index c03762693..c14af55b6 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java @@ -32,13 +32,13 @@ public class Creator { public static Creator newInstance(String name, String affiliation, String orcid) { Creator c = new Creator(); - if (!(name == null)) { + if (name != null) { c.name = name; } - if (!(affiliation == null)) { + if (affiliation != null) { c.affiliation = affiliation; } - if (!(orcid == null)) { + if (orcid != null) { c.orcid = orcid; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java index c7428de7d..509f444b9 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java @@ -3,17 +3,12 @@ package eu.dnetlib.dhp.common.api.zenodo; import java.io.Serializable; -import net.minidev.json.annotate.JsonIgnore; - public class File implements Serializable { private String checksum; private String filename; private long filesize; private String id; - @JsonIgnore - // private Links links; - public String getChecksum() { return checksum; } @@ -46,13 +41,4 @@ public class File implements Serializable { this.id = id; } -// @JsonIgnore -// public Links getLinks() { -// return links; -// } -// -// @JsonIgnore -// public void setLinks(Links links) { -// this.links = links; -// } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java index 98dabf56a..af6926cc7 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java @@ -1,11 +1,11 @@ package eu.dnetlib.dhp.common.rest; +import java.io.IOException; import java.util.Arrays; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpUriRequest; @@ -23,17 +23,20 @@ public class DNetRestClient { private static final ObjectMapper mapper = new ObjectMapper(); + private DNetRestClient() { + } + public static T doGET(final String url, Class clazz) throws Exception { final HttpGet httpGet = new HttpGet(url); return doHTTPRequest(httpGet, clazz); } - public static String doGET(final String url) throws Exception { + public static String doGET(final String url) throws IOException { final HttpGet httpGet = new HttpGet(url); return doHTTPRequest(httpGet); } - public static String doPOST(final String url, V objParam) throws Exception { + public static String doPOST(final String url, V objParam) throws IOException { final HttpPost httpPost = new HttpPost(url); if (objParam != null) { @@ -45,25 +48,25 @@ public class DNetRestClient { return doHTTPRequest(httpPost); } - public static T doPOST(final String url, V objParam, Class clazz) throws Exception { + public static T doPOST(final String url, V objParam, Class clazz) throws IOException { return mapper.readValue(doPOST(url, objParam), clazz); } - private static String doHTTPRequest(final HttpUriRequest r) throws Exception { - CloseableHttpClient client = HttpClients.createDefault(); + private static String doHTTPRequest(final HttpUriRequest r) throws IOException { + try (CloseableHttpClient client = HttpClients.createDefault()) { - log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); - log - .info( - "request headers: {}", - Arrays - .asList(r.getAllHeaders()) - .stream() - .map(h -> h.getName() + ":" + h.getValue()) - .collect(Collectors.joining(","))); + log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); + log + .info( + "request headers: {}", + Arrays + .asList(r.getAllHeaders()) + .stream() + .map(h -> h.getName() + ":" + h.getValue()) + .collect(Collectors.joining(","))); - CloseableHttpResponse response = client.execute(r); - return IOUtils.toString(response.getEntity().getContent()); + return IOUtils.toString(client.execute(r).getEntity().getContent()); + } } private static T doHTTPRequest(final HttpUriRequest r, Class clazz) throws Exception { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java index a9daede8f..b3eb98d4f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java @@ -46,7 +46,7 @@ public class Vocabulary implements Serializable { } public VocabularyTerm getTerm(final String id) { - return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null); + return Optional.ofNullable(id).map(String::toLowerCase).map(terms::get).orElse(null); } protected void addTerm(final String id, final String name) { @@ -81,7 +81,6 @@ public class Vocabulary implements Serializable { .ofNullable(getTermBySynonym(syn)) .map(term -> getTermAsQualifier(term.getId())) .orElse(null); - // .orElse(OafMapperUtils.unknown(getId(), getName())); } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java index a89bb486f..d5f57849c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java @@ -46,7 +46,6 @@ public class VocabularyGroup implements Serializable { } vocs.addTerm(vocId, termId, termName); - // vocs.addSynonyms(vocId, termId, termId); } } @@ -58,7 +57,6 @@ public class VocabularyGroup implements Serializable { final String syn = arr[2].trim(); vocs.addSynonyms(vocId, termId, syn); - // vocs.addSynonyms(vocId, termId, termId); } } @@ -98,7 +96,7 @@ public class VocabularyGroup implements Serializable { .getTerms() .values() .stream() - .map(t -> t.getId()) + .map(VocabularyTerm::getId) .collect(Collectors.toCollection(HashSet::new)); } @@ -154,16 +152,19 @@ public class VocabularyGroup implements Serializable { return Optional .ofNullable(vocId) .map(String::toLowerCase) - .map(id -> vocs.containsKey(id)) + .map(vocs::containsKey) .orElse(false); } private void addSynonyms(final String vocId, final String termId, final String syn) { String id = Optional .ofNullable(vocId) - .map(s -> s.toLowerCase()) + .map(String::toLowerCase) .orElseThrow( - () -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]"))); + () -> new IllegalArgumentException( + String + .format( + "empty vocabulary id for [term:%s, synonym:%s]", termId, syn))); Optional .ofNullable(vocs.get(id)) .orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId)) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java index f1107b4b8..c7a0b5f50 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.message; import java.io.Serializable; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; @@ -10,8 +9,8 @@ public class Message implements Serializable { private static final long serialVersionUID = 401753881204524893L; - public static String CURRENT_PARAM = "current"; - public static String TOTAL_PARAM = "total"; + public static final String CURRENT_PARAM = "current"; + public static final String TOTAL_PARAM = "total"; private MessageType messageType; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java index 7a8e55a6e..aea046203 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.merge; import java.text.Normalizer; import java.util.*; import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; @@ -19,6 +18,9 @@ public class AuthorMerger { private static final Double THRESHOLD = 0.95; + private AuthorMerger() { + } + public static List merge(List> authors) { authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); @@ -36,7 +38,8 @@ public class AuthorMerger { public static List mergeAuthor(final List a, final List b, Double threshold) { int pa = countAuthorsPids(a); int pb = countAuthorsPids(b); - List base, enrich; + List base; + List enrich; int sa = authorsSize(a); int sb = authorsSize(b); @@ -62,7 +65,7 @@ public class AuthorMerger { // (if an Author has more than 1 pid, it appears 2 times in the list) final Map basePidAuthorMap = base .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .flatMap( a -> a .getPid() @@ -74,7 +77,7 @@ public class AuthorMerger { // (list of pid that are missing in the other list) final List> pidToEnrich = enrich .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .flatMap( a -> a .getPid() @@ -117,9 +120,9 @@ public class AuthorMerger { } public static String pidToComparableString(StructuredProperty pid) { - return (pid.getQualifier() != null - ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" - : "") + final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() + : ""; + return (pid.getQualifier() != null ? classid : "") + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java index 9ac0a0bf7..fd4c0191a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java @@ -12,6 +12,9 @@ import com.ximpleware.VTDNav; /** Created by sandro on 9/29/16. */ public class VtdUtilityParser { + private VtdUtilityParser() { + } + public static List getTextValuesWithAttributes( final AutoPilot ap, final VTDNav vn, final String xpath, final List attributes) throws VtdException { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 1d002ed7e..d8b1cded8 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -284,7 +284,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { r .getAuthor() .stream() - .filter(a -> Objects.nonNull(a)) + .filter(Objects::nonNull) .filter(a -> StringUtils.isNotBlank(a.getFullname())) .filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", ""))) .collect(Collectors.toList())); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index c6a8fd5a7..720fe47fb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -17,13 +17,16 @@ import eu.dnetlib.dhp.schema.oaf.*; public class OafMapperUtils { + private OafMapperUtils() { + } + public static Oaf merge(final Oaf left, final Oaf right) { if (ModelSupport.isSubClass(left, OafEntity.class)) { return mergeEntities((OafEntity) left, (OafEntity) right); } else if (ModelSupport.isSubClass(left, Relation.class)) { ((Relation) left).mergeFrom((Relation) right); } else { - throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName()); + throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName()); } return left; } @@ -38,7 +41,7 @@ public class OafMapperUtils { } else if (ModelSupport.isSubClass(left, Project.class)) { left.mergeFrom(right); } else { - throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); + throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); } return left; } @@ -62,7 +65,7 @@ public class OafMapperUtils { public static List listKeyValues(final String... s) { if (s.length % 2 > 0) { - throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); + throw new IllegalArgumentException("Invalid number of parameters (k,v,k,v,....)"); } final List list = new ArrayList<>(); @@ -88,7 +91,7 @@ public class OafMapperUtils { .stream(values) .map(v -> field(v, info)) .filter(Objects::nonNull) - .filter(distinctByKey(f -> f.getValue())) + .filter(distinctByKey(Field::getValue)) .collect(Collectors.toList()); } @@ -97,7 +100,7 @@ public class OafMapperUtils { .stream() .map(v -> field(v, info)) .filter(Objects::nonNull) - .filter(distinctByKey(f -> f.getValue())) + .filter(distinctByKey(Field::getValue)) .collect(Collectors.toList()); } @@ -342,10 +345,10 @@ public class OafMapperUtils { if (instanceList != null) { final Optional min = instanceList .stream() - .map(i -> i.getAccessright()) + .map(Instance::getAccessright) .min(new AccessRightComparator<>()); - final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier(); + final Qualifier rights = min.map(OafMapperUtils::qualifier).orElseGet(Qualifier::new); if (StringUtils.isBlank(rights.getClassid())) { rights.setClassid(UNKNOWN); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java index 8d760a2cd..6a86f30df 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java @@ -34,6 +34,9 @@ public class DHPUtils { private static final Logger log = LoggerFactory.getLogger(DHPUtils.class); + private DHPUtils() { + } + public static Seq toSeq(List list) { return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq(); } @@ -44,7 +47,7 @@ public class DHPUtils { md.update(s.getBytes(StandardCharsets.UTF_8)); return new String(Hex.encodeHex(md.digest())); } catch (final Exception e) { - System.err.println("Error creating id"); + log.error("Error creating id from {}", s); return null; } } @@ -53,33 +56,6 @@ public class DHPUtils { return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId)); } - public static String compressString(final String input) { - try (ByteArrayOutputStream out = new ByteArrayOutputStream(); - Base64OutputStream b64os = new Base64OutputStream(out)) { - GZIPOutputStream gzip = new GZIPOutputStream(b64os); - gzip.write(input.getBytes(StandardCharsets.UTF_8)); - gzip.close(); - return out.toString(); - } catch (Throwable e) { - return null; - } - } - - public static String decompressString(final String input) { - byte[] byteArray = Base64.decodeBase64(input.getBytes()); - int len; - try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray))); - ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) { - byte[] buffer = new byte[1024]; - while ((len = gis.read(buffer)) != -1) { - bos.write(buffer, 0, len); - } - return bos.toString(); - } catch (Exception e) { - return null; - } - } - public static String getJPathString(final String jsonPath, final String json) { try { Object o = JsonPath.read(json, jsonPath); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java index b326c4159..8ae0bb5c3 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java @@ -18,13 +18,16 @@ public class ISLookupClientFactory { private static final int requestTimeout = 60000 * 10; private static final int connectTimeout = 60000 * 10; + private ISLookupClientFactory() { + } + public static ISLookUpService getLookUpService(final String isLookupUrl) { return getServiceStub(ISLookUpService.class, isLookupUrl); } @SuppressWarnings("unchecked") private static T getServiceStub(final Class clazz, final String endpoint) { - log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint)); + log.info("creating {} stub from {}", clazz.getName(), endpoint); final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean(); jaxWsProxyFactory.setServiceClass(clazz); jaxWsProxyFactory.setAddress(endpoint); @@ -38,12 +41,10 @@ public class ISLookupClientFactory { log .info( - String - .format( - "setting connectTimeout to %s, requestTimeout to %s for service %s", - connectTimeout, - requestTimeout, - clazz.getCanonicalName())); + "setting connectTimeout to {}, requestTimeout to {} for service {}", + connectTimeout, + requestTimeout, + clazz.getCanonicalName()); policy.setConnectionTimeout(connectTimeout); policy.setReceiveTimeout(requestTimeout); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java index 9b00b908c..81f1b5142 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java @@ -10,7 +10,7 @@ import net.sf.saxon.trans.XPathException; public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition { - public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; + public static final String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; public abstract String getName(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java index c7e311b02..1ea2b9f46 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java @@ -26,7 +26,7 @@ public class ExtractYear extends AbstractExtensionFunction { @Override public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { - if (arguments == null | arguments.length == 0) { + if (arguments == null || arguments.length == 0) { return new StringValue(""); } final Item item = arguments[0].head(); @@ -63,8 +63,7 @@ public class ExtractYear extends AbstractExtensionFunction { for (String format : dateFormats) { try { c.setTime(new SimpleDateFormat(format).parse(s)); - String year = String.valueOf(c.get(Calendar.YEAR)); - return year; + return String.valueOf(c.get(Calendar.YEAR)); } catch (ParseException e) { } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java index 1b5f3c40d..3e5def9b5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java @@ -30,7 +30,7 @@ public class NormalizeDate extends AbstractExtensionFunction { @Override public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { - if (arguments == null | arguments.length == 0) { + if (arguments == null || arguments.length == 0) { return new StringValue(BLANK); } String s = arguments[0].head().getStringValue(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java index 46ecafd0a..b46a415d8 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.utils.saxon; +import static org.apache.commons.lang3.StringUtils.isNotBlank; + import org.apache.commons.lang3.StringUtils; import net.sf.saxon.expr.XPathContext; @@ -26,7 +28,8 @@ public class PickFirst extends AbstractExtensionFunction { final String s1 = getValue(arguments[0]); final String s2 = getValue(arguments[1]); - return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : ""); + final String value = isNotBlank(s1) ? s1 : isNotBlank(s2) ? s2 : ""; + return new StringValue(value); } private String getValue(final Sequence arg) throws XPathException { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java index b85d866f1..61049d2e1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java @@ -12,6 +12,9 @@ import net.sf.saxon.TransformerFactoryImpl; public class SaxonTransformerFactory { + private SaxonTransformerFactory() { + } + /** * Creates the index record transformer from the given XSLT * diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java index e14020830..1788239f2 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java @@ -7,10 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.Test; -public class ArgumentApplicationParserTest { +class ArgumentApplicationParserTest { @Test - public void testParseParameter() throws Exception { + void testParseParameter() throws Exception { final String jsonConfiguration = IOUtils .toString( this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json")); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java index 870943816..fa721d5e5 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java @@ -21,13 +21,13 @@ public class HdfsSupportTest { class Remove { @Test - public void shouldThrowARuntimeExceptionOnError() { + void shouldThrowARuntimeExceptionOnError() { // when assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration())); } @Test - public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { + void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { // when HdfsSupport.remove(tempDir.toString(), new Configuration()); @@ -36,7 +36,7 @@ public class HdfsSupportTest { } @Test - public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { + void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { // given Path file = Files.createTempFile(tempDir, "p", "s"); @@ -52,13 +52,13 @@ public class HdfsSupportTest { class ListFiles { @Test - public void shouldThrowARuntimeExceptionOnError() { + void shouldThrowARuntimeExceptionOnError() { // when assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration())); } @Test - public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { + void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { Path subDir1 = Files.createTempDirectory(tempDir, "list_me"); Path subDir2 = Files.createTempDirectory(tempDir, "list_me"); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java index 5ebd7213e..cb9ae2886 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java @@ -5,10 +5,10 @@ import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Test; -public class PacePersonTest { +class PacePersonTest { @Test - public void pacePersonTest1() { + void pacePersonTest1() { PacePerson p = new PacePerson("Artini, Michele", false); assertEquals("Artini", p.getSurnameString()); @@ -17,7 +17,7 @@ public class PacePersonTest { } @Test - public void pacePersonTest2() { + void pacePersonTest2() { PacePerson p = new PacePerson("Michele G. Artini", false); assertEquals("Artini, Michele G.", p.getNormalisedFullname()); assertEquals("Michele G", p.getNameString()); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java index 2f01c0863..8fa966c2f 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java @@ -18,7 +18,8 @@ public class SparkSessionSupportTest { class RunWithSparkSession { @Test - public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() + @SuppressWarnings("unchecked") + void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() throws Exception { // given SparkSession spark = mock(SparkSession.class); @@ -37,7 +38,8 @@ public class SparkSessionSupportTest { } @Test - public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() + @SuppressWarnings("unchecked") + void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() throws Exception { // given SparkSession spark = mock(SparkSession.class); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java index 9ae9c33c2..2ccaed3e4 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java @@ -12,7 +12,7 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @Disabled -public class ZenodoAPIClientTest { +class ZenodoAPIClientTest { private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions"; private final String ACCESS_TOKEN = ""; @@ -22,7 +22,7 @@ public class ZenodoAPIClientTest { private final String depositionId = "674915"; @Test - public void testUploadOldDeposition() throws IOException, MissingConceptDoiException { + void testUploadOldDeposition() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId)); @@ -44,7 +44,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewDeposition() throws IOException { + void testNewDeposition() throws IOException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -67,7 +67,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewVersionNewName() throws IOException, MissingConceptDoiException { + void testNewVersionNewName() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -87,7 +87,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewVersionOldName() throws IOException, MissingConceptDoiException { + void testNewVersionOldName() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java index 9c4e62214..3a7a41a1b 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; -public class AuthorMergerTest { +class AuthorMergerTest { private String publicationsBasePath; @@ -43,7 +43,7 @@ public class AuthorMergerTest { } @Test - public void mergeTest() { // used in the dedup: threshold set to 0.95 + void mergeTest() { // used in the dedup: threshold set to 0.95 for (List authors1 : authors) { System.out.println("List " + (authors.indexOf(authors1) + 1)); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index 8d519a93f..4068f0abb 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Result; import me.xuender.unidecode.Unidecode; -public class OafMapperUtilsTest { +class OafMapperUtilsTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @@ -42,7 +42,7 @@ public class OafMapperUtilsTest { } @Test - public void testDateValidation() { + void testDateValidation() { assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent()); assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent()); @@ -147,44 +147,46 @@ public class OafMapperUtilsTest { } @Test - public void testDate() { - System.out.println(GraphCleaningFunctions.cleanDate("23-FEB-1998")); + void testDate() { + final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998"); + assertNotNull(date); + System.out.println(date); } @Test - public void testMergePubs() throws IOException { + void testMergePubs() throws IOException { Publication p1 = read("publication_1.json", Publication.class); Publication p2 = read("publication_2.json", Publication.class); Dataset d1 = read("dataset_1.json", Dataset.class); Dataset d2 = read("dataset_2.json", Dataset.class); - assertEquals(p1.getCollectedfrom().size(), 1); - assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID); - assertEquals(d2.getCollectedfrom().size(), 1); + assertEquals(1, p1.getCollectedfrom().size()); + assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey()); + assertEquals(1, d2.getCollectedfrom().size()); assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertTrue( + assertEquals( + ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, OafMapperUtils .mergeResults(p1, d2) .getResulttype() - .getClassid() - .equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)); + .getClassid()); - assertEquals(p2.getCollectedfrom().size(), 1); + assertEquals(1, p2.getCollectedfrom().size()); assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertEquals(d1.getCollectedfrom().size(), 1); + assertEquals(1, d1.getCollectedfrom().size()); assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertTrue( + assertEquals( + ModelConstants.DATASET_RESULTTYPE_CLASSID, OafMapperUtils .mergeResults(p2, d1) .getResulttype() - .getClassid() - .equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)); + .getClassid()); } protected HashSet cfId(List collectedfrom) { - return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new)); + return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new)); } protected T read(String filename, Class clazz) throws IOException { diff --git a/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java b/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java index d1d1ada71..5743b0831 100644 --- a/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java @@ -3,10 +3,10 @@ package eu.dnetlib.scholexplorer.relation; import org.junit.jupiter.api.Test; -public class RelationMapperTest { +class RelationMapperTest { @Test - public void testLoadRels() throws Exception { + void testLoadRels() throws Exception { RelationMapper relationMapper = RelationMapper.load(); relationMapper.keySet().forEach(System.out::println); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java index 5a80c0b53..088e618c7 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java @@ -3,40 +3,37 @@ package eu.dnetlib.dhp.actionmanager; import java.io.Serializable; import java.io.StringReader; -import java.util.*; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Triple; import org.dom4j.Document; import org.dom4j.DocumentException; -import org.dom4j.Element; import org.dom4j.io.SAXReader; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; import eu.dnetlib.actionmanager.rmi.ActionManagerException; -import eu.dnetlib.actionmanager.set.ActionManagerSet; -import eu.dnetlib.actionmanager.set.ActionManagerSet.ImpactTypes; -import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJob; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import scala.Tuple2; public class ISClient implements Serializable { - private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class); + private static final Logger log = LoggerFactory.getLogger(ISClient.class); private static final String INPUT_ACTION_SET_ID_SEPARATOR = ","; - private final ISLookUpService isLookup; + private final transient ISLookUpService isLookup; public ISClient(String isLookupUrl) { isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); @@ -63,7 +60,7 @@ public class ISClient implements Serializable { .map( sets -> sets .stream() - .map(set -> parseSetInfo(set)) + .map(ISClient::parseSetInfo) .filter(t -> ids.contains(t.getLeft())) .map(t -> buildDirectory(basePath, t)) .collect(Collectors.toList())) @@ -73,15 +70,17 @@ public class ISClient implements Serializable { } } - private Triple parseSetInfo(String set) { + private static Triple parseSetInfo(String set) { try { - Document doc = new SAXReader().read(new StringReader(set)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + Document doc = reader.read(new StringReader(set)); return Triple .of( doc.valueOf("//SET/@id"), doc.valueOf("//SET/@directory"), doc.valueOf("//SET/@latest")); - } catch (DocumentException e) { + } catch (DocumentException | SAXException e) { throw new IllegalStateException(e); } } @@ -99,7 +98,7 @@ public class ISClient implements Serializable { final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='" + propertyName + "']/@value/string()"; - log.debug("quering for service property: " + q); + log.debug("quering for service property: {}", q); try { final List value = isLookup.quickSearchProfile(q); return Iterables.getOnlyElement(value); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java index fbb072957..eccfa445c 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java @@ -62,6 +62,7 @@ public class MergeAndGet { x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); } + @SuppressWarnings("unchecked") private static G selectNewerAndGet(G x, A y) { if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) { diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java index 7893fcf8b..c5f252c97 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java @@ -74,7 +74,9 @@ public class PromoteActionPayloadForGraphTableJob { .orElse(true); logger.info("shouldGroupById: {}", shouldGroupById); + @SuppressWarnings("unchecked") Class rowClazz = (Class) Class.forName(graphTableClassName); + @SuppressWarnings("unchecked") Class actionPayloadClazz = (Class) Class.forName(actionPayloadClassName); throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz); @@ -152,7 +154,7 @@ public class PromoteActionPayloadForGraphTableJob { return spark .read() .parquet(path) - .map((MapFunction) value -> extractPayload(value), Encoders.STRING()) + .map((MapFunction) PromoteActionPayloadForGraphTableJob::extractPayload, Encoders.STRING()) .map( (MapFunction) value -> decodePayload(actionPayloadClazz, value), Encoders.bean(actionPayloadClazz)); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java index f51c697f4..62eec13d5 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java @@ -80,7 +80,7 @@ public class PartitionActionSetsByPayloadTypeJobTest { private ISClient isClient; @Test - public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { + void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { // given Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets"); Path outputDir = workingDir.resolve("output"); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java index b2248d77a..4c88e9de3 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java @@ -20,7 +20,7 @@ public class MergeAndGetTest { class MergeFromAndGetStrategy { @Test - public void shouldThrowForOafAndOaf() { + void shouldThrowForOafAndOaf() { // given Oaf a = mock(Oaf.class); Oaf b = mock(Oaf.class); @@ -33,7 +33,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafAndRelation() { + void shouldThrowForOafAndRelation() { // given Oaf a = mock(Oaf.class); Relation b = mock(Relation.class); @@ -46,7 +46,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafAndOafEntity() { + void shouldThrowForOafAndOafEntity() { // given Oaf a = mock(Oaf.class); OafEntity b = mock(OafEntity.class); @@ -59,7 +59,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOaf() { + void shouldThrowForRelationAndOaf() { // given Relation a = mock(Relation.class); Oaf b = mock(Oaf.class); @@ -72,7 +72,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOafEntity() { + void shouldThrowForRelationAndOafEntity() { // given Relation a = mock(Relation.class); OafEntity b = mock(OafEntity.class); @@ -85,7 +85,7 @@ public class MergeAndGetTest { } @Test - public void shouldBehaveProperlyForRelationAndRelation() { + void shouldBehaveProperlyForRelationAndRelation() { // given Relation a = mock(Relation.class); Relation b = mock(Relation.class); @@ -101,7 +101,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndOaf() { + void shouldThrowForOafEntityAndOaf() { // given OafEntity a = mock(OafEntity.class); Oaf b = mock(Oaf.class); @@ -114,7 +114,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndRelation() { + void shouldThrowForOafEntityAndRelation() { // given OafEntity a = mock(OafEntity.class); Relation b = mock(Relation.class); @@ -127,7 +127,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { + void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { // given class OafEntitySub1 extends OafEntity { } @@ -145,7 +145,7 @@ public class MergeAndGetTest { } @Test - public void shouldBehaveProperlyForOafEntityAndOafEntity() { + void shouldBehaveProperlyForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); OafEntity b = mock(OafEntity.class); @@ -165,7 +165,7 @@ public class MergeAndGetTest { class SelectNewerAndGetStrategy { @Test - public void shouldThrowForOafEntityAndRelation() { + void shouldThrowForOafEntityAndRelation() { // given OafEntity a = mock(OafEntity.class); Relation b = mock(Relation.class); @@ -178,7 +178,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOafEntity() { + void shouldThrowForRelationAndOafEntity() { // given Relation a = mock(Relation.class); OafEntity b = mock(OafEntity.class); @@ -191,7 +191,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndResult() { + void shouldThrowForOafEntityAndResult() { // given OafEntity a = mock(OafEntity.class); Result b = mock(Result.class); @@ -204,7 +204,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { + void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { // given // real types must be used because subclass-superclass resolution does not work for // mocks @@ -221,7 +221,7 @@ public class MergeAndGetTest { } @Test - public void shouldShouldReturnLeftForOafEntityAndOafEntity() { + void shouldShouldReturnLeftForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); when(a.getLastupdatetimestamp()).thenReturn(1L); @@ -238,7 +238,7 @@ public class MergeAndGetTest { } @Test - public void shouldShouldReturnRightForOafEntityAndOafEntity() { + void shouldShouldReturnRightForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); when(a.getLastupdatetimestamp()).thenReturn(2L); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java index 79ab55e07..99ce961aa 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java @@ -77,7 +77,7 @@ public class PromoteActionPayloadForGraphTableJobTest { class Main { @Test - public void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { + void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { // given Class rowClazz = Relation.class; Class actionPayloadClazz = OafEntity.class; @@ -116,7 +116,7 @@ public class PromoteActionPayloadForGraphTableJobTest { @ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}") @MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams") - public void shouldPromoteActionPayloadForGraphTable( + void shouldPromoteActionPayloadForGraphTable( MergeAndGet.Strategy strategy, Class rowClazz, Class actionPayloadClazz) diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java index 477e4b204..cbc1bfaba 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java @@ -44,7 +44,7 @@ public class PromoteActionPayloadFunctionsTest { class JoinTableWithActionPayloadAndMerge { @Test - public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { + void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { // given class OafImpl extends Oaf { } @@ -58,7 +58,7 @@ public class PromoteActionPayloadFunctionsTest { } @Test - public void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { + void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { // given String id0 = "id0"; String id1 = "id1"; @@ -138,7 +138,7 @@ public class PromoteActionPayloadFunctionsTest { } @Test - public void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { + void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { // given String id0 = "id0"; String id1 = "id1"; @@ -218,7 +218,7 @@ public class PromoteActionPayloadFunctionsTest { class GroupTableByIdAndMerge { @Test - public void shouldRunProperly() { + void shouldRunProperly() { // given String id1 = "id1"; String id2 = "id2"; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java index 4b9fd33f4..a48b84a33 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -28,15 +29,16 @@ import eu.dnetlib.dhp.schema.oaf.Result; public class CollectAndSave implements Serializable { private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - CollectAndSave.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")); + Objects + .requireNonNull( + CollectAndSave.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index cea8c2891..f178451c1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -87,7 +87,7 @@ public class SparkAtomicActionScoreJob implements Serializable { private static void prepareResults(SparkSession spark, String inputPath, String outputPath, String bipScorePath, Class inputClazz) { - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD bipDeserializeJavaRDD = sc .textFile(bipScorePath) @@ -101,8 +101,6 @@ public class SparkAtomicActionScoreJob implements Serializable { return bs; }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)); - System.out.println(bipScores.count()); - Dataset results = readPath(spark, inputPath, inputClazz); results.createOrReplaceTempView("result"); @@ -124,7 +122,7 @@ public class SparkAtomicActionScoreJob implements Serializable { ret.setId(value._2().getId()); return ret; }, Encoders.bean(BipScore.class)) - .groupByKey((MapFunction) value -> value.getId(), Encoders.STRING()) + .groupByKey((MapFunction) BipScore::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (k, it) -> { Result ret = new Result(); ret.setDataInfo(getDataInfo()); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java index 9e852eb77..40cf5ee53 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -171,26 +171,23 @@ public class PrepareProgramme { } private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) { - if (!a.getLanguage().equals("en")) { - if (b.getLanguage().equalsIgnoreCase("en")) { - a.setTitle(b.getTitle()); - a.setLanguage(b.getLanguage()); - } + if (!a.getLanguage().equals("en") && b.getLanguage().equalsIgnoreCase("en")) { + a.setTitle(b.getTitle()); + a.setLanguage(b.getLanguage()); } - if (StringUtils.isEmpty(a.getShortTitle())) { - if (!StringUtils.isEmpty(b.getShortTitle())) { - a.setShortTitle(b.getShortTitle()); - } + if (StringUtils.isEmpty(a.getShortTitle()) && !StringUtils.isEmpty(b.getShortTitle())) { + a.setShortTitle(b.getShortTitle()); } return a; } + @SuppressWarnings("unchecked") private static List prepareClassification(JavaRDD h2020Programmes) { Object[] codedescription = h2020Programmes .map( value -> new Tuple2<>(value.getCode(), - new Tuple2(value.getTitle(), value.getShortTitle()))) + new Tuple2<>(value.getTitle(), value.getShortTitle()))) .collect() .toArray(); @@ -216,7 +213,7 @@ public class PrepareProgramme { String[] tmp = ent.split("\\."); if (tmp.length <= 2) { if (StringUtils.isEmpty(entry._2()._2())) { - map.put(entry._1(), new Tuple2(entry._2()._1(), entry._2()._1())); + map.put(entry._1(), new Tuple2<>(entry._2()._1(), entry._2()._1())); } else { map.put(entry._1(), entry._2()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index cecd537ba..b1a381415 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -29,7 +29,7 @@ import scala.Tuple2; */ public class PrepareProjects { - private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); + private static final Logger log = LoggerFactory.getLogger(PrepareProjects.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(String[] args) throws Exception { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java index 2bba9fb60..2cc20cb15 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java @@ -31,15 +31,16 @@ import eu.dnetlib.dhp.common.DbClient; */ public class ReadProjectsFromDB implements Closeable { - private final DbClient dbClient; private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class); + + private static final String query = "SELECT code " + + "from projects where id like 'corda__h2020%' "; + + private final DbClient dbClient; private final Configuration conf; private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final static String query = "SELECT code " + - "from projects where id like 'corda__h2020%' "; - public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -65,9 +66,9 @@ public class ReadProjectsFromDB implements Closeable { } } - public void execute(final String sql, final Function> producer) throws Exception { + public void execute(final String sql, final Function> producer) { - final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r)); + final Consumer consumer = rs -> producer.apply(rs).forEach(this::writeProject); dbClient.processResults(sql, consumer); } @@ -94,20 +95,20 @@ public class ReadProjectsFromDB implements Closeable { public ReadProjectsFromDB( final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) - throws Exception { + throws IOException { this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.conf = new Configuration(); this.conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(this.conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); } @Override diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index fdc12c662..a4a0bf6a4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -31,6 +31,7 @@ import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.H2020Classification; import eu.dnetlib.dhp.schema.oaf.H2020Programme; +import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; @@ -47,13 +48,10 @@ import scala.Tuple2; * * To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more * than one programme. - * - * */ public class SparkAtomicActionJob { private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final HashMap programmeMap = new HashMap<>(); public static void main(String[] args) throws Exception { @@ -137,7 +135,6 @@ public class SparkAtomicActionJob { h2020classification.setClassification(csvProgramme.getClassification()); h2020classification.setH2020Programme(pm); setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short()); - // setProgramme(h2020classification, ocsvProgramme.get().getClassification()); pp.setH2020classification(Arrays.asList(h2020classification)); return pp; @@ -152,20 +149,16 @@ public class SparkAtomicActionJob { .map((MapFunction, Project>) p -> { Optional op = Optional.ofNullable(p._2()); Project rp = p._1(); - if (op.isPresent()) { - rp.setH2020topicdescription(op.get().getTitle()); - } + op.ifPresent(excelTopic -> rp.setH2020topicdescription(excelTopic.getTitle())); return rp; }, Encoders.bean(Project.class)) .filter(Objects::nonNull) .groupByKey( - (MapFunction) p -> p.getId(), + (MapFunction) OafEntity::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (s, it) -> { Project first = it.next(); - it.forEachRemaining(p -> { - first.mergeFrom(p); - }); + it.forEachRemaining(first::mergeFrom); return first; }, Encoders.bean(Project.class)) .toJavaRDD() @@ -189,12 +182,6 @@ public class SparkAtomicActionJob { h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); } -// private static void setProgramme(H2020Classification h2020Classification, String classification) { -// String[] tmp = classification.split(" \\| "); -// -// h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); -// } - public static Dataset readPath( SparkSession spark, String inputPath, Class clazz) { return spark diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java index 8bdce903b..e2d8c528e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java @@ -27,12 +27,14 @@ public class CSVParser { final Set headers = parser.getHeaderMap().keySet(); Class clazz = Class.forName(classForName); for (CSVRecord csvRecord : parser.getRecords()) { - final Object cc = clazz.newInstance(); + + @SuppressWarnings("unchecked") + final R cc = (R) clazz.newInstance(); for (String header : headers) { FieldUtils.writeField(cc, header, csvRecord.get(header), true); } - ret.add((R) cc); + ret.add(cc); } return ret; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 5f5b61d8b..5ce730692 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -26,52 +26,52 @@ public class EXCELParser { throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException, InvalidFormatException { - OPCPackage pkg = OPCPackage.open(file); - XSSFWorkbook wb = new XSSFWorkbook(pkg); + try (OPCPackage pkg = OPCPackage.open(file); XSSFWorkbook wb = new XSSFWorkbook(pkg)) { - XSSFSheet sheet = wb.getSheet(sheetName); - - if (sheetName == null) { - throw new RuntimeException("Sheet name " + sheetName + " not present in current file"); - } - - List ret = new ArrayList<>(); - - DataFormatter dataFormatter = new DataFormatter(); - Iterator rowIterator = sheet.rowIterator(); - List headers = new ArrayList<>(); - int count = 0; - while (rowIterator.hasNext()) { - Row row = rowIterator.next(); - - if (count == 0) { - Iterator cellIterator = row.cellIterator(); - - while (cellIterator.hasNext()) { - Cell cell = cellIterator.next(); - headers.add(dataFormatter.formatCellValue(cell)); - } - } else { - Class clazz = Class.forName(classForName); - final Object cc = clazz.newInstance(); - - for (int i = 0; i < headers.size(); i++) { - Cell cell = row.getCell(i); - FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true); - - } - - EXCELTopic et = (EXCELTopic) cc; - if (StringUtils.isNotBlank(et.getRcn())) { - ret.add((R) cc); - } + XSSFSheet sheet = wb.getSheet(sheetName); + if (sheetName == null) { + throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file"); } - count += 1; - } + List ret = new ArrayList<>(); - return ret; + DataFormatter dataFormatter = new DataFormatter(); + Iterator rowIterator = sheet.rowIterator(); + List headers = new ArrayList<>(); + int count = 0; + while (rowIterator.hasNext()) { + Row row = rowIterator.next(); + + if (count == 0) { + Iterator cellIterator = row.cellIterator(); + + while (cellIterator.hasNext()) { + Cell cell = cellIterator.next(); + headers.add(dataFormatter.formatCellValue(cell)); + } + } else { + Class clazz = Class.forName(classForName); + final Object cc = clazz.newInstance(); + + for (int i = 0; i < headers.size(); i++) { + Cell cell = row.getCell(i); + FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true); + + } + + EXCELTopic et = (EXCELTopic) cc; + if (StringUtils.isNotBlank(et.getRcn())) { + ret.add((R) cc); + } + + } + + count += 1; + } + + return ret; + } } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java index c73f7ec3d..b59bd9dbc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java @@ -25,7 +25,7 @@ import eu.dnetlib.dhp.collection.HttpConnector2; */ public class ReadCSV implements Closeable { private static final Log log = LogFactory.getLog(ReadCSV.class); - private final Configuration conf; + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final String csvFile; @@ -49,17 +49,16 @@ public class ReadCSV implements Closeable { log.info("Getting CSV file..."); readCSV.execute(classForName); - } } - public void execute(final String classForName) throws Exception { + public void execute(final String classForName) + throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException { CSVParser csvParser = new CSVParser(); csvParser .parse(csvFile, classForName) .stream() - .forEach(p -> write(p)); - + .forEach(this::write); } @Override @@ -72,18 +71,18 @@ public class ReadCSV implements Closeable { final String hdfsNameNode, final String fileURL) throws Exception { - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); HttpConnector2 httpConnector = new HttpConnector2(); - FileSystem fileSystem = FileSystem.get(this.conf); + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + final FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); this.csvFile = httpConnector.getInputSource(fileURL); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java index 5ce0a681c..330779e24 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java @@ -11,18 +11,20 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.collection.HttpConnector2; /** * Applies the parsing of an excel file and writes the Serialization of it in hdfs */ public class ReadExcel implements Closeable { - private static final Log log = LogFactory.getLog(ReadCSV.class); - private final Configuration conf; + private static final Log log = LogFactory.getLog(ReadExcel.class); + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final InputStream excelFile; @@ -51,13 +53,15 @@ public class ReadExcel implements Closeable { } } - public void execute(final String classForName, final String sheetName) throws Exception { + public void execute(final String classForName, final String sheetName) + throws IOException, ClassNotFoundException, InvalidFormatException, IllegalAccessException, + InstantiationException { + EXCELParser excelParser = new EXCELParser(); excelParser .parse(excelFile, classForName, sheetName) .stream() - .forEach(p -> write(p)); - + .forEach(this::write); } @Override @@ -68,20 +72,20 @@ public class ReadExcel implements Closeable { public ReadExcel( final String hdfsPath, final String hdfsNameNode, - final String fileURL) - throws Exception { - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); + final String fileURL) throws CollectorException, IOException { + + final Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); HttpConnector2 httpConnector = new HttpConnector2(); - FileSystem fileSystem = FileSystem.get(this.conf); + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); this.excelFile = httpConnector.getInputSourceAsStream(fileURL); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java index d6c17e415..869e1cb68 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java @@ -9,6 +9,7 @@ import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; +import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; @@ -74,7 +75,7 @@ public class GenerateRorActionSetJob { final String jsonConfiguration = IOUtils .toString( - SparkAtomicActionJob.class + GenerateRorActionSetJob.class .getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -108,7 +109,7 @@ public class GenerateRorActionSetJob { private static void processRorOrganizations(final SparkSession spark, final String inputPath, - final String outputPath) throws Exception { + final String outputPath) throws IOException { readInputPath(spark, inputPath) .map( @@ -203,7 +204,7 @@ public class GenerateRorActionSetJob { private static Dataset readInputPath( final SparkSession spark, - final String path) throws Exception { + final String path) throws IOException { try (final FileSystem fileSystem = FileSystem.get(new Configuration()); final InputStream is = fileSystem.open(new Path(path))) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java index b566a5501..a5acea5ae 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java @@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Address implements Serializable { + private static final long serialVersionUID = 2444635485253443195L; + @JsonProperty("lat") private Float lat; @@ -37,8 +39,6 @@ public class Address implements Serializable { @JsonProperty("line") private String line; - private final static long serialVersionUID = 2444635485253443195L; - public Float getLat() { return lat; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java index 3dab60a9f..1e7621f98 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java @@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Country implements Serializable { + private static final long serialVersionUID = 4357848706229493627L; + @JsonProperty("country_code") private String countryCode; @JsonProperty("country_name") private String countryName; - private final static long serialVersionUID = 4357848706229493627L; - public String getCountryCode() { return countryCode; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java index 406bfd82c..5ea419b4e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java @@ -13,7 +13,7 @@ public class ExternalIdType implements Serializable { private String preferred; - private final static long serialVersionUID = 2616688352998387611L; + private static final long serialVersionUID = 2616688352998387611L; public ExternalIdType() { } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java index 3fd0c9250..a744a325f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java @@ -15,8 +15,7 @@ import com.fasterxml.jackson.databind.JsonNode; public class ExternalIdTypeDeserializer extends JsonDeserializer { @Override - public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) - throws IOException, JsonProcessingException { + public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) throws IOException { final ObjectCodec oc = p.getCodec(); final JsonNode node = oc.readTree(p); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java index 9616db447..9317a777c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java @@ -19,7 +19,7 @@ public class GeonamesAdmin implements Serializable { @JsonProperty("code") private String code; - private final static long serialVersionUID = 7294958526269195673L; + private static final long serialVersionUID = 7294958526269195673L; public String getAsciiName() { return asciiName; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java index 2b0487168..b13d64b10 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java @@ -31,7 +31,7 @@ public class GeonamesCity implements Serializable { @JsonProperty("license") private License license; - private final static long serialVersionUID = -8389480201526252955L; + private static final long serialVersionUID = -8389480201526252955L; public NameAndCode getNutsLevel2() { return nutsLevel2; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java index 61eb0339d..9a2cb39e3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java @@ -13,7 +13,7 @@ public class Label implements Serializable { @JsonProperty("label") private String label; - private final static long serialVersionUID = -6576156103297850809L; + private static final long serialVersionUID = -6576156103297850809L; public String getIso639() { return iso639; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java index bdc8f4c42..a0f6cf774 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java @@ -13,7 +13,7 @@ public class License implements Serializable { @JsonProperty("license") private String license; - private final static long serialVersionUID = -194308261058176439L; + private static final long serialVersionUID = -194308261058176439L; public String getAttribution() { return attribution; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java index 61d7eb8e6..c0f5d7645 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java @@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class NameAndCode implements Serializable { + private static final long serialVersionUID = 5459836979206140843L; + @JsonProperty("name") private String name; @JsonProperty("code") private String code; - private final static long serialVersionUID = 5459836979206140843L; - public String getName() { return name; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java index 8b73db98f..db9f96445 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java @@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Relationship implements Serializable { + private static final long serialVersionUID = 7847399503395576960L; + @JsonProperty("type") private String type; @@ -16,8 +18,6 @@ public class Relationship implements Serializable { @JsonProperty("label") private String label; - private final static long serialVersionUID = 7847399503395576960L; - public String getType() { return type; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java index 94de34fee..b8041cfdf 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java @@ -11,6 +11,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class RorOrganization implements Serializable { + private static final long serialVersionUID = -2658312087616043225L; + @JsonProperty("ip_addresses") private List ipAddresses = new ArrayList<>(); @@ -59,8 +61,6 @@ public class RorOrganization implements Serializable { @JsonProperty("status") private String status; - private final static long serialVersionUID = -2658312087616043225L; - public List getIpAddresses() { return ipAddresses; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java index c822a6723..8e46ab92b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java @@ -11,8 +11,6 @@ import java.util.Objects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; - import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.utils.DHPUtils; @@ -20,12 +18,12 @@ public class AggregatorReport extends LinkedHashMap implements C private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class); - private MessageSender messageSender; + private transient MessageSender messageSender; public AggregatorReport() { } - public AggregatorReport(MessageSender messageSender) throws IOException { + public AggregatorReport(MessageSender messageSender) { this.messageSender = messageSender; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java index 9926f1688..549169673 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java @@ -22,7 +22,7 @@ public abstract class ReportingJob { protected final AggregatorReport report; - public ReportingJob(AggregatorReport report) { + protected ReportingJob(AggregatorReport report) { this.report = report; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java index 65e7805d8..bab44a3b1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java @@ -25,7 +25,7 @@ public class MDStoreActionNode { NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK } - public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; + public static final String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s"; public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort"; @@ -70,7 +70,7 @@ public class MDStoreActionNode { if (StringUtils.isBlank(hdfsuri)) { throw new IllegalArgumentException("missing or empty argument namenode"); } - final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); + final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { @@ -94,7 +94,7 @@ public class MDStoreActionNode { break; } case ROLLBACK: { - final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); + final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index 23ee3e2c6..d0872da1d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -116,7 +116,7 @@ public class CollectorWorker extends ReportingJob { final CollectorPlugin.NAME.OTHER_NAME plugin = Optional .ofNullable(api.getParams().get("other_plugin_type")) .map(CollectorPlugin.NAME.OTHER_NAME::valueOf) - .get(); + .orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type")); switch (plugin) { case mdstore_mongodb_dump: diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index f6fdc266e..f1f74b09e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -207,6 +207,7 @@ public class GenerateNativeStoreSparkJob { totalItems.add(1); try { SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8))); Node node = document.selectSingleNode(xpath); final String originalIdentifier = node.getText(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java index a61e2032c..8493a3436 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java @@ -32,7 +32,7 @@ public class HttpConnector2 { private String responseType = null; - private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; + private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; public HttpConnector2() { this(new HttpClientParams()); @@ -120,7 +120,7 @@ public class HttpConnector2 { if (is3xx(urlConn.getResponseCode())) { // REDIRECTS final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); - log.info(String.format("The requested url has been moved to %s", newUrl)); + log.info("The requested url has been moved to {}", newUrl); report .put( REPORT_PREFIX + urlConn.getResponseCode(), @@ -140,14 +140,14 @@ public class HttpConnector2 { if (retryAfter > 0) { log .warn( - requestUrl + " - waiting and repeating request after suggested retry-after " - + retryAfter + " sec."); + "{} - waiting and repeating request after suggested retry-after {} sec.", + requestUrl, retryAfter); backoffAndSleep(retryAfter * 1000); } else { log .warn( - requestUrl + " - waiting and repeating request after default delay of " - + getClientParams().getRetryDelay() + " sec."); + "{} - waiting and repeating request after default delay of {} sec.", + requestUrl, getClientParams().getRetryDelay()); backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); } report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); @@ -181,12 +181,12 @@ public class HttpConnector2 { } private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { - log.debug("StatusCode: " + urlConn.getResponseMessage()); + log.debug("StatusCode: {}", urlConn.getResponseMessage()); for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { if (e.getKey() != null) { for (String v : e.getValue()) { - log.debug(" key: " + e.getKey() + " - value: " + v); + log.debug(" key: {} - value: {}", e.getKey(), v); } } } @@ -204,7 +204,7 @@ public class HttpConnector2 { private int obtainRetryAfter(final Map> headerMap) { for (String key : headerMap.keySet()) { - if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (headerMap.get(key).size() > 0) + if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty()) && NumberUtils.isCreatable(headerMap.get(key).get(0))) { return Integer.parseInt(headerMap.get(key).get(0)) + 10; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java index a27314983..549c59720 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java @@ -11,8 +11,6 @@ import org.bson.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.mongodb.MongoClient; -import com.mongodb.MongoClientURI; import com.mongodb.client.MongoCollection; import eu.dnetlib.dhp.aggregation.common.AggregatorReport; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java index 3199af5b7..ec5bab448 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java @@ -23,7 +23,7 @@ public class MongoDbDumpCollectorPlugin implements CollectorPlugin { public static final String PATH_PARAM = "path"; public static final String BODY_JSONPATH = "$.body"; - public FileSystem fileSystem; + private final FileSystem fileSystem; public MongoDbDumpCollectorPlugin(FileSystem fileSystem) { this.fileSystem = fileSystem; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index 75dd746ea..331dee6b4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.collection.plugin.oai; import java.io.IOException; -import java.io.StringReader; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; @@ -16,7 +15,6 @@ import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Node; import org.dom4j.io.OutputFormat; -import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,7 +28,8 @@ public class OaiIterator implements Iterator { private static final Logger log = LoggerFactory.getLogger(OaiIterator.class); - private final static String REPORT_PREFIX = "oai:"; + private static final String REPORT_PREFIX = "oai:"; + public static final String UTF_8 = "UTF-8"; private final Queue queue = new PriorityBlockingQueue<>(); @@ -68,7 +67,7 @@ public class OaiIterator implements Iterator { try { this.token = firstPage(); } catch (final CollectorException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } } } @@ -90,7 +89,7 @@ public class OaiIterator implements Iterator { try { token = otherPages(token); } catch (final CollectorException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } } return res; @@ -99,23 +98,24 @@ public class OaiIterator implements Iterator { @Override public void remove() { + throw new UnsupportedOperationException(); } private String firstPage() throws CollectorException { try { - String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8"); + String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, UTF_8); if (set != null && !set.isEmpty()) { - url += "&set=" + URLEncoder.encode(set, "UTF-8"); + url += "&set=" + URLEncoder.encode(set, UTF_8); } if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX) || fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { - url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); + url += "&from=" + URLEncoder.encode(fromDate, UTF_8); } if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX) || untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { - url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); + url += "&until=" + URLEncoder.encode(untilDate, UTF_8); } - log.info("Start harvesting using url: " + url); + log.info("Start harvesting using url: {}", url); return downloadPage(url); } catch (final UnsupportedEncodingException e) { @@ -143,7 +143,7 @@ public class OaiIterator implements Iterator { return downloadPage( baseUrl + "?verb=ListRecords&resumptionToken=" - + URLEncoder.encode(resumptionToken, "UTF-8")); + + URLEncoder.encode(resumptionToken, UTF_8)); } catch (final UnsupportedEncodingException e) { report.put(e.getClass().getName(), e.getMessage()); throw new CollectorException(e); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java index 764c21fc2..67ec22b46 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java @@ -131,7 +131,9 @@ public class RestIterator implements Iterator { private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) throws TransformerConfigurationException, XPathExpressionException { - transformer = TransformerFactory.newInstance().newTransformer(); + final TransformerFactory factory = TransformerFactory.newInstance(); + factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); xpath = XPathFactory.newInstance().newXPath(); @@ -142,7 +144,7 @@ public class RestIterator implements Iterator { private void initQueue() { query = baseUrl + "?" + queryParams + querySize + queryFormat; - log.info("REST calls starting with " + query); + log.info("REST calls starting with {}", query); } private void disconnect() { @@ -174,7 +176,7 @@ public class RestIterator implements Iterator { try { query = downloadPage(query); } catch (CollectorException e) { - log.debug("CollectorPlugin.next()-Exception: " + e); + log.debug("CollectorPlugin.next()-Exception: {}", e); throw new RuntimeException(e); } } @@ -198,7 +200,7 @@ public class RestIterator implements Iterator { // check if cursor=* is initial set otherwise add it to the queryParam URL if (resumptionType.equalsIgnoreCase("deep-cursor")) { - log.debug("check resumptionType deep-cursor and check cursor=*?" + query); + log.debug("check resumptionType deep-cursor and check cursor=*?{}", query); if (!query.contains("&cursor=")) { query += "&cursor=*"; } @@ -208,16 +210,16 @@ public class RestIterator implements Iterator { log.info("requestig URL [{}]", query); URL qUrl = new URL(query); - log.debug("authMethod :" + authMethod); + log.debug("authMethod: {}", authMethod); if ("bearer".equalsIgnoreCase(this.authMethod)) { - log.trace("authMethod before inputStream: " + resultXml); + log.trace("authMethod before inputStream: {}", resultXml); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken); conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType()); conn.setRequestMethod("GET"); theHttpInputStream = conn.getInputStream(); } else if (BASIC.equalsIgnoreCase(this.authMethod)) { - log.trace("authMethod before inputStream: " + resultXml); + log.trace("authMethod before inputStream: {}", resultXml); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken); conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType()); @@ -237,13 +239,13 @@ public class RestIterator implements Iterator { if (!(emptyXml).equalsIgnoreCase(resultXml)) { resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE); nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET); - log.debug("nodeList.length: " + nodeList.getLength()); + log.debug("nodeList.length: {}", nodeList.getLength()); for (int i = 0; i < nodeList.getLength(); i++) { StringWriter sw = new StringWriter(); transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw)); String toEnqueue = sw.toString(); if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) { - log.warn("The following record resulted in empty item for the feeding queue: " + resultXml); + log.warn("The following record resulted in empty item for the feeding queue: {}", resultXml); } else { recordQueue.add(sw.toString()); } @@ -274,9 +276,9 @@ public class RestIterator implements Iterator { String[] resumptionKeyValue = arrayUrlArgStr.split("="); if (isInteger(resumptionKeyValue[1])) { urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]); - log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize); + log.debug("discover OldResumptionSize from Url (int): {}", urlOldResumptionSize); } else { - log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]); + log.debug("discover OldResumptionSize from Url (str): {}", resumptionKeyValue[1]); } } } @@ -295,7 +297,7 @@ public class RestIterator implements Iterator { discoverResultSize += nodeList.getLength(); } } - log.info("discoverResultSize: {}", discoverResultSize); + log.info("discoverResultSize: {}", discoverResultSize); break; case "pagination": diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index c7201a267..a01703675 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -67,10 +67,10 @@ public class TransformSparkJobNode { log.info("outputBasePath: {}", outputBasePath); final String isLookupUrl = parser.get("isLookupUrl"); - log.info(String.format("isLookupUrl: %s", isLookupUrl)); + log.info("isLookupUrl: {}", isLookupUrl); final String dateOfTransformation = parser.get("dateOfTransformation"); - log.info(String.format("dateOfTransformation: %s", dateOfTransformation)); + log.info("dateOfTransformation: {}", dateOfTransformation); final Integer rpt = Optional .ofNullable(parser.get("recordsPerTask")) @@ -129,9 +129,9 @@ public class TransformSparkJobNode { .map((Function) x::call); saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH); - log.info("Transformed item " + ct.getProcessedItems().count()); - log.info("Total item " + ct.getTotalItems().count()); - log.info("Transformation Error item " + ct.getErrorItems().count()); + log.info("Transformed item {}", ct.getProcessedItems().count()); + log.info("Total item {}", ct.getTotalItems().count()); + log.info("Transformation Error item {}", ct.getErrorItems().count()); final long mdStoreSize = spark.read().load(outputBasePath + MDSTORE_DATA_PATH).count(); writeHdfsFile( diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java index 096d0e289..e93f3b518 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java @@ -13,12 +13,18 @@ import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class TransformationFactory { private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class); - public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; + public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') " + + + "where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; + + private TransformationFactory() { + } public static MapFunction getTransformationPlugin( final Map jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService) @@ -27,7 +33,7 @@ public class TransformationFactory { try { final String transformationPlugin = jobArgument.get("transformationPlugin"); - log.info("Transformation plugin required " + transformationPlugin); + log.info("Transformation plugin required {}", transformationPlugin); switch (transformationPlugin) { case "XSLT_TRANSFORM": { final String transformationRuleId = jobArgument.get("transformationRuleId"); @@ -38,7 +44,7 @@ public class TransformationFactory { final String transformationRule = queryTransformationRuleFromIS( transformationRuleId, isLookupService); - final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation")); + final long dateOfTransformation = Long.parseLong(jobArgument.get("dateOfTransformation")); return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation, vocabularies); @@ -46,7 +52,6 @@ public class TransformationFactory { default: throw new DnetTransformationException( "transformation plugin does not exists for " + transformationPlugin); - } } catch (Throwable e) { @@ -55,9 +60,9 @@ public class TransformationFactory { } private static String queryTransformationRuleFromIS(final String transformationRuleId, - final ISLookUpService isLookUpService) throws Exception { + final ISLookUpService isLookUpService) throws DnetTransformationException, ISLookUpException { final String query = String.format(TRULE_XQUERY, transformationRuleId); - System.out.println("asking query to IS: " + query); + log.info("asking query to IS: {}", query); List result = isLookUpService.quickSearchProfile(query); if (result == null || result.isEmpty()) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java index 9da0747e6..3d57b966f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java @@ -4,11 +4,6 @@ package eu.dnetlib.dhp.transformation.xslt; import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI; import java.io.Serializable; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import net.sf.saxon.s9api.*; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java index e3d588858..1aa549c09 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java @@ -28,22 +28,12 @@ public class PersonCleaner implements ExtensionFunction, Serializable { private static final Set particles = null; - public PersonCleaner() { - - } - private String normalize(String s) { s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD s = s.replaceAll("\\(.+\\)", ""); s = s.replaceAll("\\[.+\\]", ""); s = s.replaceAll("\\{.+\\}", ""); s = s.replaceAll("\\s+-\\s+", "-"); - -// s = s.replaceAll("[\\W&&[^,-]]", " "); - -// System.out.println("class Person: s: " + s); - -// s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^,-]]", " "); s = s.replaceAll("[\\p{Punct}&&[^-,]]", " "); s = s.replace("\\d", " "); s = s.replace("\\n", " "); @@ -51,8 +41,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { s = s.replaceAll("\\s+", " "); if (s.contains(",")) { - // System.out.println("class Person: s: " + s); - String[] arr = s.split(","); if (arr.length == 1) { @@ -60,9 +48,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } else if (arr.length > 1) { surname = splitTerms(arr[0]); firstname = splitTermsFirstName(arr[1]); -// System.out.println("class Person: surname: " + surname); -// System.out.println("class Person: firstname: " + firstname); - fullname.addAll(surname); fullname.addAll(firstname); } @@ -82,7 +67,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini firstname = fullname.subList(0, lastInitialPosition + 1); - System.out.println("name: " + firstname); surname = fullname.subList(lastInitialPosition + 1, fullname.size()); } else if (hasSurnameInUpperCase) { // Case: Michele ARTINI for (String term : fullname) { @@ -119,16 +103,9 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } private List splitTerms(String s) { - if (particles == null) { - // particles = NGramUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt"); - } - List list = Lists.newArrayList(); for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) { - // if (!particles.contains(part.toLowerCase())) { list.add(part); - - // } } return list; } @@ -152,9 +129,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { public String getNormalisedFullname() { return isAccurate() ? Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : Joiner.on(" ").join(fullname); - // return isAccurate() ? - // Joiner.on(" ").join(getCapitalSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : - // Joiner.on(" ").join(fullname); } public List getCapitalSurname() { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java index 43291e5de..acf48ccc5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.transformation.xslt; -import java.io.ByteArrayInputStream; import java.io.Serializable; import java.io.StringWriter; import java.nio.charset.StandardCharsets; @@ -18,11 +17,11 @@ import net.sf.saxon.s9api.*; public class XSLTTransformationFunction implements MapFunction, Serializable { - public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform"; + public static final String QNAME_BASE_URI = "http://eu/dnetlib/transform"; - private final static String DATASOURCE_ID_PARAM = "varDataSourceId"; + private static final String DATASOURCE_ID_PARAM = "varDataSourceId"; - private final static String DATASOURCE_NAME_PARAM = "varOfficialName"; + private static final String DATASOURCE_NAME_PARAM = "varOfficialName"; private final AggregationCounter aggregationCounter; @@ -38,8 +37,7 @@ public class XSLTTransformationFunction implements MapFunction { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java index 7200d2896..f2158748b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; +import static org.junit.jupiter.api.Assertions.*; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -67,7 +69,7 @@ public class SparkAtomicActionScoreJobTest { } @Test - public void matchOne() throws Exception { + void matchOne() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -98,7 +100,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 1); + assertEquals(1, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); @@ -129,7 +131,7 @@ public class SparkAtomicActionScoreJobTest { } @Test - public void matchOneWithTwo() throws Exception { + void matchOneWithTwo() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -160,7 +162,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 1); + assertEquals(1, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); @@ -190,23 +192,21 @@ public class SparkAtomicActionScoreJobTest { List tmp_ds = execVerification.filter("id = 'influence'").select("value").collectAsList(); String tmp_influence = tmp_ds.get(0).getString(0); - Assertions - .assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); + assertTrue( + "1.47565045883e-08".equals(tmp_influence) || + "1.98956540239e-08".equals(tmp_influence)); tmp_influence = tmp_ds.get(1).getString(0); - Assertions - .assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); + assertTrue( + "1.47565045883e-08".equals(tmp_influence) || + "1.98956540239e-08".equals(tmp_influence)); - Assertions.assertTrue(!tmp_ds.get(0).getString(0).equals(tmp_ds.get(1).getString(0))); + assertNotEquals(tmp_ds.get(1).getString(0), tmp_ds.get(0).getString(0)); } @Test - public void matchTwo() throws Exception { + void matchTwo() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -237,7 +237,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 2); + assertEquals(2, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java index da5beecf9..dd7e1910f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java @@ -9,10 +9,10 @@ import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; -public class CSVParserTest { +class CSVParserTest { @Test - public void readProgrammeTest() throws Exception { + void readProgrammeTest() throws Exception { String programmecsv = IOUtils .toString( diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index b7155bc3a..6c309acb3 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -30,7 +30,7 @@ public class EXCELParserTest { } @Test - public void test1() throws CollectorException, IOException, InvalidFormatException, ClassNotFoundException, + void test1() throws CollectorException, IOException, InvalidFormatException, ClassNotFoundException, IllegalAccessException, InstantiationException { EXCELParser excelParser = new EXCELParser(); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index 256dc0521..f128b5610 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -66,7 +66,7 @@ public class PrepareH2020ProgrammeTest { } @Test - public void numberDistinctProgrammeTest() throws Exception { + void numberDistinctProgrammeTest() throws Exception { PrepareProgramme .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index 0db3485f5..f0f3532aa 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -66,7 +66,7 @@ public class PrepareProjectTest { } @Test - public void numberDistinctProjectTest() throws Exception { + void numberDistinctProjectTest() throws Exception { PrepareProjects .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index 42e494681..a77dbace4 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -69,7 +69,7 @@ public class SparkUpdateProjectTest { } @Test - public void numberDistinctProgrammeTest() throws Exception { + void numberDistinctProgrammeTest() throws Exception { SparkAtomicActionJob .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java index f16901cb4..aa11f4ab5 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.actionmanager.ror; import java.io.FileInputStream; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -30,7 +32,9 @@ class GenerateRorActionSetJobTest { .readValue(IOUtils.toString(getClass().getResourceAsStream("ror_org.json")), RorOrganization.class); final Organization org = GenerateRorActionSetJob.convertRorOrg(r); - System.out.println(mapper.writeValueAsString(org)); + final String s = mapper.writeValueAsString(org); + Assertions.assertTrue(StringUtils.isNotBlank(s)); + System.out.println(s); } @Test @@ -39,7 +43,9 @@ class GenerateRorActionSetJobTest { .readValue(IOUtils.toString(new FileInputStream(local_file_path)), RorOrganization[].class); for (final RorOrganization r : arr) { - GenerateRorActionSetJob.convertRorOrg(r); + Organization o = GenerateRorActionSetJob.convertRorOrg(r); + Assertions.assertNotNull(o); + Assertions.assertTrue(StringUtils.isNotBlank(o.getId())); } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java index afb6ae6a1..633a47379 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java @@ -97,7 +97,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(1) - public void testGenerateNativeStoreSparkJobRefresh() throws Exception { + void testGenerateNativeStoreSparkJobRefresh() throws Exception { MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json"); FileUtils.forceMkdir(new File(mdStoreV1.getHdfsPath())); @@ -125,7 +125,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(2) - public void testGenerateNativeStoreSparkJobIncremental() throws Exception { + void testGenerateNativeStoreSparkJobIncremental() throws Exception { MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); FileUtils.forceMkdir(new File(mdStoreV2.getHdfsPath())); @@ -155,7 +155,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(3) - public void testTransformSparkJob() throws Exception { + void testTransformSparkJob() throws Exception { setUpVocabulary(); @@ -206,7 +206,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testJSONSerialization() throws Exception { + void testJSONSerialization() throws Exception { final String s = IOUtils.toString(getClass().getResourceAsStream("mdStoreVersion_1.json")); System.out.println("s = " + s); final ObjectMapper mapper = new ObjectMapper(); @@ -217,7 +217,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testGenerationMetadataRecord() throws Exception { + void testGenerationMetadataRecord() throws Exception { final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml")); @@ -236,7 +236,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testEquals() throws IOException { + void testEquals() throws IOException { final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml")); final MetadataRecord record = GenerateNativeStoreSparkJob diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java index efe925175..f2b873e10 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.collection.HttpClientParams; * @author js, Andreas Czerniak * */ -public class RestCollectorPluginTest { +class RestCollectorPluginTest { private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class); @@ -65,7 +65,7 @@ public class RestCollectorPluginTest { @Disabled @Test - public void test() throws CollectorException { + void test() throws CollectorException { AtomicInteger i = new AtomicInteger(0); final Stream stream = rcp.collect(api, new AggregatorReport()); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java index b5ea5f069..f52f4632a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.collector.worker; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -11,10 +11,10 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.collection.ApiDescriptor; @Disabled -public class CollectorWorkerApplicationTests { +class CollectorWorkerApplicationTests { @Test - public void testCollectionOAI() throws Exception { + void testCollectionOAI() throws Exception { final ApiDescriptor api = new ApiDescriptor(); api.setId("oai"); api.setProtocol("oai"); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java index 948a8f93b..7bd7baaea 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java @@ -33,7 +33,7 @@ import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @ExtendWith(MockitoExtension.class) -public class TransformationJobTest extends AbstractVocabularyTest { +class TransformationJobTest extends AbstractVocabularyTest { private SparkConf sparkConf; @@ -49,7 +49,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Date cleaner") - public void testDateCleaner() throws Exception { + void testDateCleaner() throws Exception { DateCleaner dc = new DateCleaner(); assertEquals("1982-09-20", dc.clean("20/09/1982")); assertEquals("2002-09-20", dc.clean("20-09-2002")); @@ -60,7 +60,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform Single XML using zenodo_tr XSLTTransformator") - public void testTransformSaxonHE() throws Exception { + void testTransformSaxonHE() throws Exception { // We Set the input Record getting the XML from the classpath final MetadataRecord mr = new MetadataRecord(); @@ -79,7 +79,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform Inst.&Them.v4 record XML with zenodo_tr") - public void testTransformITGv4Zenodo() throws Exception { + void testTransformITGv4Zenodo() throws Exception { // We Set the input Record getting the XML from the classpath final MetadataRecord mr = new MetadataRecord(); @@ -97,7 +97,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform record XML with xslt_cleaning_datarepo_datacite/oaiOpenAIRE") - public void testTransformMostlyUsedScript() throws Exception { + void testTransformMostlyUsedScript() throws Exception { String xslTransformationScript = ""; xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_datarepo_datacite.xsl"; @@ -119,7 +119,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform record XML with xslt_cleaning_REST_OmicsDI") - public void testTransformRestScript() throws Exception { + void testTransformRestScript() throws Exception { String xslTransformationScript = ""; xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_REST_OmicsDI.xsl"; @@ -140,7 +140,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test TransformSparkJobNode.main with oaiOpenaire_datacite (v4)") - public void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception { + void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception { try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) { @@ -203,7 +203,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test TransformSparkJobNode.main") - public void transformTest(@TempDir Path testDir) throws Exception { + void transformTest(@TempDir Path testDir) throws Exception { try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) { diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java index 2caa66db4..38ffd28fe 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; import java.sql.ResultSet; +import java.sql.SQLException; import java.util.Arrays; import java.util.List; import java.util.function.Consumer; @@ -32,11 +33,11 @@ public class ReadBlacklistFromDB implements Closeable { private final DbClient dbClient; private static final Log log = LogFactory.getLog(ReadBlacklistFromDB.class); - private final Configuration conf; + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final static String query = "SELECT source_type, unnest(original_source_objects) as source, " + + private static final String QUERY = "SELECT source_type, unnest(original_source_objects) as source, " + "target_type, unnest(original_target_objects) as target, " + "relationship FROM blacklist WHERE status = 'ACCEPTED'"; @@ -60,12 +61,12 @@ public class ReadBlacklistFromDB implements Closeable { dbPassword)) { log.info("Processing blacklist..."); - rbl.execute(query, rbl::processBlacklistEntry); + rbl.execute(QUERY, rbl::processBlacklistEntry); } } - public void execute(final String sql, final Function> producer) throws Exception { + public void execute(final String sql, final Function> producer) { final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r)); @@ -99,7 +100,7 @@ public class ReadBlacklistFromDB implements Closeable { return Arrays.asList(direct, inverse); - } catch (final Exception e) { + } catch (final SQLException e) { throw new RuntimeException(e); } } @@ -112,12 +113,14 @@ public class ReadBlacklistFromDB implements Closeable { public ReadBlacklistFromDB( final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) - throws Exception { + throws IOException { this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); - FileSystem fileSystem = FileSystem.get(this.conf); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { @@ -133,7 +136,7 @@ public class ReadBlacklistFromDB implements Closeable { try { writer.write(OBJECT_MAPPER.writeValueAsString(r)); writer.newLine(); - } catch (final Exception e) { + } catch (final IOException e) { throw new RuntimeException(e); } } diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java index 91bcb9d1c..38ef63d27 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java @@ -114,10 +114,8 @@ public class SparkRemoveBlacklistedRelationJob { .map((MapFunction, Relation>) c -> { Relation ir = c._1(); Optional obl = Optional.ofNullable(c._2()); - if (obl.isPresent()) { - if (ir.equals(obl.get())) { - return null; - } + if (obl.isPresent() && ir.equals(obl.get())) { + return null; } return ir; }, Encoders.bean(Relation.class)) diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java index 585848589..058ea271c 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java @@ -62,7 +62,7 @@ public class BlackListTest { } @Test - public void noRemoveTest() throws Exception { + void noRemoveTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { @@ -89,7 +89,7 @@ public class BlackListTest { } @Test - public void removeNoMergeMatchTest() throws Exception { + void removeNoMergeMatchTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { @@ -128,7 +128,7 @@ public class BlackListTest { } @Test - public void removeMergeMatchTest() throws Exception { + void removeMergeMatchTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java index 429eb7d11..584438d44 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java @@ -9,19 +9,24 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.DateUtils; +import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerRelatedDatasource; +import eu.dnetlib.broker.objects.OaBrokerTypedValue; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; public class EventFactory { - private final static String PRODUCER_ID = "OpenAIRE"; + private static final String PRODUCER_ID = "OpenAIRE"; - private final static String[] DATE_PATTERNS = { + private static final String[] DATE_PATTERNS = { "yyyy-MM-dd" }; + private EventFactory() { + } + public static Event newBrokerEvent(final UpdateInfo updateInfo) { final Event res = new Event(); @@ -61,7 +66,7 @@ public class EventFactory { map.setTargetResultId(target.getOpenaireId()); final List titles = target.getTitles(); - if (titles.size() > 0) { + if (!titles.isEmpty()) { map.setTargetResultTitle(titles.get(0)); } @@ -70,8 +75,12 @@ public class EventFactory { map.setTargetDateofacceptance(date); } - map.setTargetSubjects(target.getSubjects().stream().map(s -> s.getValue()).collect(Collectors.toList())); - map.setTargetAuthors(target.getCreators().stream().map(a -> a.getFullname()).collect(Collectors.toList())); + map + .setTargetSubjects( + target.getSubjects().stream().map(OaBrokerTypedValue::getValue).collect(Collectors.toList())); + map + .setTargetAuthors( + target.getCreators().stream().map(OaBrokerAuthor::getFullname).collect(Collectors.toList())); // PROVENANCE INFO map.setTrust(updateInfo.getTrust()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java index 89fc2e703..f0aa6491f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java @@ -10,15 +10,11 @@ import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.TypedColumn; import org.apache.spark.sql.expressions.Aggregator; import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; @@ -88,8 +84,7 @@ class CountAggregator extends Aggregator, Tuple2 merge(final Tuple2 arg0, final Tuple2 arg1) { - final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1); - return new Tuple2<>(s, arg0._2 + arg1._2); + return doMerge(arg0, arg1); } @Override @@ -99,6 +94,10 @@ class CountAggregator extends Aggregator, Tuple2 reduce(final Tuple2 arg0, final Tuple2 arg1) { + return doMerge(arg0, arg1); + } + + private Tuple2 doMerge(final Tuple2 arg0, final Tuple2 arg1) { final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1); return new Tuple2<>(s, arg0._2 + arg1._2); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java index 05ff2aa38..0fbc763e0 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa; +import java.io.IOException; import java.util.Date; import java.util.HashMap; import java.util.Map; @@ -98,7 +99,6 @@ public class IndexEventSubsetJob { .javaRDD(); final Map esCfg = new HashMap<>(); - // esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54"); esCfg.put("es.index.auto.create", "false"); esCfg.put("es.nodes", indexHost); @@ -114,11 +114,11 @@ public class IndexEventSubsetJob { log.info("*** Deleting old events"); final String message = deleteOldEvents(brokerApiBaseUrl, now - 1000); - log.info("*** Deleted events: " + message); + log.info("*** Deleted events: {}", message); } - private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws Exception { + private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws IOException { final String url = brokerApiBaseUrl + "/api/events/byCreationDate/0/" + l; final HttpDelete req = new HttpDelete(url); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index 80549e1ce..e8ef5dd3e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -33,11 +33,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.broker.model.ConditionParams; -import eu.dnetlib.dhp.broker.model.Event; -import eu.dnetlib.dhp.broker.model.MappedFields; -import eu.dnetlib.dhp.broker.model.Notification; -import eu.dnetlib.dhp.broker.model.Subscription; +import eu.dnetlib.dhp.broker.model.*; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; import eu.dnetlib.dhp.broker.oa.util.NotificationGroup; import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils; @@ -89,9 +85,9 @@ public class IndexNotificationsJob { final List subscriptions = listSubscriptions(brokerApiBaseUrl); - log.info("Number of subscriptions: " + subscriptions.size()); + log.info("Number of subscriptions: {}", subscriptions.size()); - if (subscriptions.size() > 0) { + if (!subscriptions.isEmpty()) { final Encoder ngEncoder = Encoders.bean(NotificationGroup.class); final Encoder nEncoder = Encoders.bean(Notification.class); final Dataset notifications = ClusterUtils @@ -106,7 +102,6 @@ public class IndexNotificationsJob { .javaRDD(); final Map esCfg = new HashMap<>(); - // esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54"); esCfg.put("es.index.auto.create", "false"); esCfg.put("es.nodes", indexHost); @@ -122,7 +117,7 @@ public class IndexNotificationsJob { log.info("*** Deleting old notifications"); final String message = deleteOldNotifications(brokerApiBaseUrl, startTime - 1000); - log.info("*** Deleted notifications: " + message); + log.info("*** Deleted notifications: {}", message); log.info("*** sendNotifications (emails, ...)"); sendNotifications(brokerApiBaseUrl, startTime - 1000); @@ -174,28 +169,28 @@ public class IndexNotificationsJob { return false; } - if (conditions.containsKey("targetDateofacceptance") && !conditions + if (conditions.containsKey("targetDateofacceptance") && conditions .get("targetDateofacceptance") .stream() - .anyMatch( + .noneMatch( c -> SubscriptionUtils .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { return false; } if (conditions.containsKey("targetResultTitle") - && !conditions + && conditions .get("targetResultTitle") .stream() - .anyMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { + .noneMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { return false; } if (conditions.containsKey("targetAuthors") - && !conditions + && conditions .get("targetAuthors") .stream() - .allMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { + .noneMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { return false; } @@ -207,7 +202,7 @@ public class IndexNotificationsJob { } - private static List listSubscriptions(final String brokerApiBaseUrl) throws Exception { + private static List listSubscriptions(final String brokerApiBaseUrl) throws IOException { final String url = brokerApiBaseUrl + "/api/subscriptions"; final HttpGet req = new HttpGet(url); @@ -222,7 +217,7 @@ public class IndexNotificationsJob { } } - private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws Exception { + private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws IOException { final String url = brokerApiBaseUrl + "/api/notifications/byDate/0/" + l; final HttpDelete req = new HttpDelete(url); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java index 380a689e4..0c74d8a6d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java @@ -7,6 +7,7 @@ import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.elasticsearch.spark.rdd.api.java.JavaEsSpark; @@ -61,7 +62,7 @@ public class IndexOnESJob { final JavaRDD inputRdd = ClusterUtils .readPath(spark, eventsPath, Event.class) - .map(IndexOnESJob::eventAsJsonString, Encoders.STRING()) + .map((MapFunction) IndexOnESJob::eventAsJsonString, Encoders.STRING()) .javaRDD(); final Map esCfg = new HashMap<>(); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java index e061c0d3b..b5c891bb8 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java @@ -77,9 +77,8 @@ public class PartitionEventsByDsIdJob { } log.info("validOpendoarIds: {}", validOpendoarIds); - runWithSparkSession(conf, isSparkSessionManaged, spark -> { - - ClusterUtils + runWithSparkSession( + conf, isSparkSessionManaged, spark -> ClusterUtils .readPath(spark, eventsPath, Event.class) .filter((FilterFunction) e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId())) .filter((FilterFunction) e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX)) @@ -92,9 +91,7 @@ public class PartitionEventsByDsIdJob { .partitionBy("group") .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(partitionPath); - - }); + .json(partitionPath)); renameSubDirs(partitionPath); } @@ -102,14 +99,14 @@ public class PartitionEventsByDsIdJob { private static void renameSubDirs(final String path) throws IOException { final FileSystem fs = FileSystem.get(new Configuration()); - log.info("** Renaming subdirs of " + path); + log.info("** Renaming subdirs of {}", path); for (final FileStatus fileStatus : fs.listStatus(new Path(path))) { if (fileStatus.isDirectory()) { final Path oldPath = fileStatus.getPath(); final String oldName = oldPath.getName(); if (oldName.contains("=")) { final Path newPath = new Path(path + "/" + StringUtils.substringAfter(oldName, "=")); - log.info(" * " + oldPath.getName() + " -> " + newPath.getName()); + log.info(" * {} -> {}", oldPath.getName(), newPath.getName()); fs.rename(oldPath, newPath); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java index 61ab5e250..2a247b7aa 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java @@ -105,7 +105,7 @@ public class PrepareRelatedDatasourcesJob { .filter((FilterFunction) r -> !ClusterUtils.isDedupRoot(r.getId())) .filter((FilterFunction) r -> r.getDataInfo().getDeletedbyinference()) .map( - (MapFunction) r -> DatasourceRelationsAccumulator.calculateTuples(r), + (MapFunction) DatasourceRelationsAccumulator::calculateTuples, Encoders.bean(DatasourceRelationsAccumulator.class)) .flatMap( (FlatMapFunction>) acc -> acc diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index fba82aa8c..87fed7db7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -26,7 +26,7 @@ public abstract class UpdateMatcher { private final BiConsumer compileHighlightFunction; private final Function highlightToStringFunction; - public UpdateMatcher(final int maxNumber, final Function topicFunction, + protected UpdateMatcher(final int maxNumber, final Function topicFunction, final BiConsumer compileHighlightFunction, final Function highlightToStringFunction) { this.maxNumber = maxNumber; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java index 2f73a2448..88ad48178 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java @@ -14,11 +14,11 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; public abstract class AbstractEnrichMissingDataset extends UpdateMatcher { - public AbstractEnrichMissingDataset(final Topic topic) { + protected AbstractEnrichMissingDataset(final Topic topic) { super(10, rel -> topic, (p, rel) -> p.getDatasets().add(rel), - rel -> rel.getOpenaireId()); + OaBrokerRelatedDataset::getOpenaireId); } protected abstract boolean filterByType(String relType); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java index ab2735f2a..440602772 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java @@ -15,7 +15,7 @@ public class EnrichMissingProject extends UpdateMatcher { super(20, prj -> Topic.ENRICH_MISSING_PROJECT, (p, prj) -> p.getProjects().add(prj), - prj -> prj.getOpenaireId()); + OaBrokerProject::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java index 85086a6df..2e523da2f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java @@ -18,7 +18,7 @@ public class EnrichMoreProject extends UpdateMatcher { super(20, prj -> Topic.ENRICH_MORE_PROJECT, (p, prj) -> p.getProjects().add(prj), - prj -> prj.getOpenaireId()); + OaBrokerProject::getOpenaireId); } @Override @@ -32,7 +32,7 @@ public class EnrichMoreProject extends UpdateMatcher { final Set existingProjects = target .getProjects() .stream() - .map(p -> p.getOpenaireId()) + .map(OaBrokerProject::getOpenaireId) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java index 7ba3e5e02..a709eea30 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java @@ -14,11 +14,11 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; public abstract class AbstractEnrichMissingPublication extends UpdateMatcher { - public AbstractEnrichMissingPublication(final Topic topic) { + protected AbstractEnrichMissingPublication(final Topic topic) { super(10, rel -> topic, (p, rel) -> p.getPublications().add(rel), - rel -> rel.getOpenaireId()); + OaBrokerRelatedPublication::getOpenaireId); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java index a638024bc..a75666027 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java @@ -16,7 +16,7 @@ public class EnrichMissingSoftware super(10, s -> Topic.ENRICH_MISSING_SOFTWARE, (p, s) -> p.getSoftwares().add(s), - s -> s.getOpenaireId()); + OaBrokerRelatedSoftware::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java index a6cd34359..ec340b42f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java @@ -18,7 +18,7 @@ public class EnrichMoreSoftware extends UpdateMatcher { super(10, s -> Topic.ENRICH_MORE_SOFTWARE, (p, s) -> p.getSoftwares().add(s), - s -> s.getOpenaireId()); + OaBrokerRelatedSoftware::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index e834d1dde..125eac862 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -20,7 +20,7 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher { super(40, aut -> Topic.ENRICH_MISSING_AUTHOR_ORCID, (p, aut) -> p.getCreators().add(aut), - aut -> aut.getOrcid()); + OaBrokerAuthor::getOrcid); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java index 4e4003890..f32cec90d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java @@ -23,7 +23,7 @@ public class EnrichMissingPid extends UpdateMatcher { protected List findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) { - if (target.getPids().size() > 0) { + if (!target.getPids().isEmpty()) { return Arrays.asList(); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java index cb3ea5464..f07bbd52f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java @@ -35,7 +35,7 @@ public class EnrichMissingSubject extends UpdateMatcher { } }, (p, s) -> p.getSubjects().add(s), - s -> subjectAsString(s)); + EnrichMissingSubject::subjectAsString); } @Override @@ -49,7 +49,7 @@ public class EnrichMissingSubject extends UpdateMatcher { final Set existingSubject = target .getSubjects() .stream() - .map(s -> subjectAsString(s)) + .map(EnrichMissingSubject::subjectAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java index 46f6fa80c..585531095 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java @@ -33,7 +33,7 @@ public class EnrichMoreOpenAccess extends UpdateMatcher { .getInstances() .stream() .filter(i -> i.getLicense().equals(BrokerConstants.OPEN_ACCESS)) - .map(i -> i.getUrl()) + .map(OaBrokerInstance::getUrl) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java index 609437b9d..a98b96b99 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java @@ -18,7 +18,7 @@ public class EnrichMorePid extends UpdateMatcher { super(20, pid -> Topic.ENRICH_MORE_PID, (p, pid) -> p.getPids().add(pid), - pid -> pidAsString(pid)); + EnrichMorePid::pidAsString); } @Override @@ -32,7 +32,7 @@ public class EnrichMorePid extends UpdateMatcher { final Set existingPids = target .getPids() .stream() - .map(pid -> pidAsString(pid)) + .map(EnrichMorePid::pidAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java index 1f6edf96e..b62b509c7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java @@ -35,7 +35,7 @@ public class EnrichMoreSubject extends UpdateMatcher { } }, (p, s) -> p.getSubjects().add(s), - s -> subjectAsString(s)); + EnrichMoreSubject::subjectAsString); } @Override @@ -49,7 +49,7 @@ public class EnrichMoreSubject extends UpdateMatcher { final Set existingSubjects = target .getSubjects() .stream() - .map(pid -> subjectAsString(pid)) + .map(EnrichMoreSubject::subjectAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index 2055a014e..790ca4e61 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -12,6 +12,9 @@ import eu.dnetlib.dhp.schema.common.ModelSupport; public class BrokerConstants { + private BrokerConstants() { + } + public static final String OPEN_ACCESS = "OPEN"; public static final String IS_MERGED_IN_CLASS = ModelConstants.IS_MERGED_IN; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java index 7c4ca1d22..2e9c03990 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java @@ -24,6 +24,9 @@ public class ClusterUtils { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private ClusterUtils() { + } + public static void createDirIfMissing(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java index 6f0a52244..bc37203d3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.broker.oa.util; import java.util.ArrayList; import java.util.List; import java.util.Objects; +import java.util.function.Function; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -13,8 +14,6 @@ import org.dom4j.DocumentHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Function; - import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerExternalReference; import eu.dnetlib.broker.objects.OaBrokerInstance; @@ -46,6 +45,9 @@ public class ConversionUtils { private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class); + private ConversionUtils() { + } + public static List oafInstanceToBrokerInstances(final Instance i) { if (i == null) { return new ArrayList<>(); @@ -69,7 +71,7 @@ public class ConversionUtils { return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null; } - public static final OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { + public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { if (d == null) { return null; } @@ -100,7 +102,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerMainEntity oafResultToBrokerResult(final Result result) { + public static OaBrokerMainEntity oafResultToBrokerResult(final Result result) { if (result == null) { return null; } @@ -142,12 +144,12 @@ public class ConversionUtils { final String pids = author.getPid() != null ? author .getPid() .stream() - .filter(pid -> pid != null) + .filter(Objects::nonNull) .filter(pid -> pid.getQualifier() != null) .filter(pid -> pid.getQualifier().getClassid() != null) .filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID)) - .map(pid -> pid.getValue()) - .map(pid -> cleanOrcid(pid)) + .map(StructuredProperty::getValue) + .map(ConversionUtils::cleanOrcid) .filter(StringUtils::isNotBlank) .findFirst() .orElse(null) : null; @@ -187,7 +189,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerProject oafProjectToBrokerProject(final Project p) { + public static OaBrokerProject oafProjectToBrokerProject(final Project p) { if (p == null) { return null; } @@ -206,14 +208,14 @@ public class ConversionUtils { res.setJurisdiction(fdoc.valueOf("/fundingtree/funder/jurisdiction")); res.setFundingProgram(fdoc.valueOf("//funding_level_0/name")); } catch (final DocumentException e) { - log.error("Error in record " + p.getId() + ": invalid fundingtree: " + ftree); + log.error("Error in record {}: invalid fundingtree: {}", p.getId(), ftree); } } return res; } - public static final OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { + public static OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { if (sw == null) { return null; } @@ -228,7 +230,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) { + public static OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) { if (ds == null) { return null; } @@ -241,7 +243,7 @@ public class ConversionUtils { } private static String first(final List list) { - return list != null && list.size() > 0 ? list.get(0) : null; + return list != null && !list.isEmpty() ? list.get(0) : null; } private static String kvValue(final KeyValue kv) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java index c693be93c..658a42ac1 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java @@ -10,6 +10,8 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple3; @@ -39,7 +41,7 @@ public class DatasourceRelationsAccumulator implements Serializable { final Set collectedFromSet = r .getCollectedfrom() .stream() - .map(kv -> kv.getKey()) + .map(KeyValue::getKey) .filter(StringUtils::isNotBlank) .distinct() .collect(Collectors.toSet()); @@ -47,10 +49,10 @@ public class DatasourceRelationsAccumulator implements Serializable { final Set hostedBySet = r .getInstance() .stream() - .map(i -> i.getHostedby()) + .map(Instance::getHostedby) .filter(Objects::nonNull) .filter(kv -> !StringUtils.equalsIgnoreCase(kv.getValue(), "Unknown Repository")) - .map(kv -> kv.getKey()) + .map(KeyValue::getKey) .filter(StringUtils::isNotBlank) .distinct() .filter(id -> !collectedFromSet.contains(id)) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java index 103751f95..b2214e07e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java @@ -41,8 +41,6 @@ import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup; public class EventFinder { - private static final Logger log = LoggerFactory.getLogger(EventFinder.class); - private static final List> matchers = new ArrayList<>(); static { matchers.add(new EnrichMissingAbstract()); @@ -72,6 +70,9 @@ public class EventFinder { matchers.add(new EnrichMissingDatasetIsSupplementedBy()); } + private EventFinder() { + } + public static EventGroup generateEvents(final ResultGroup results, final Set dsIdWhitelist, final Set dsIdBlacklist, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java index adb1c753b..cf3562193 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java @@ -12,6 +12,9 @@ public class SubscriptionUtils { private static final long ONE_DAY = 86_400_000; + private SubscriptionUtils() { + } + public static boolean verifyListSimilar(final List list, final String value) { return list.stream().anyMatch(s -> verifySimilar(s, value)); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java index 72fe1b204..a49801f32 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java @@ -30,7 +30,9 @@ public class TrustUtils { } catch (final IOException e) { log.error("Error loading dedupConfig, e"); } + } + private TrustUtils() { } protected static float calculateTrust(final OaBrokerMainEntity r1, final OaBrokerMainEntity r2) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java index 5a9cb5e09..d29414e52 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -88,14 +88,14 @@ public final class UpdateInfo { .getDatasources() .stream() .filter(ds -> ds.getRelType().equals(BrokerConstants.COLLECTED_FROM_REL)) - .map(ds -> ds.getName()) + .map(OaBrokerRelatedDatasource::getName) .findFirst() .orElse(""); final String provType = getSource() .getDatasources() .stream() .filter(ds -> ds.getRelType().equals(BrokerConstants.COLLECTED_FROM_REL)) - .map(ds -> ds.getType()) + .map(OaBrokerRelatedDatasource::getType) .findFirst() .orElse(""); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java index 8fa95abe5..45bfc785f 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa.matchers; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Arrays; @@ -72,7 +73,7 @@ class UpdateMatcherTest { final Collection> list = matcher .searchUpdatesForRecord(res, targetDs, Arrays.asList(p1, p2, p3, p4), null); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } @Test @@ -127,7 +128,7 @@ class UpdateMatcherTest { final Collection> list = matcher .searchUpdatesForRecord(res, targetDs, Arrays.asList(p1, p2, p3, p4), null); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } } diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java index 77a19af4c..550ded9f4 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.simple; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.List; @@ -32,7 +33,7 @@ class EnrichMissingPublicationDateTest { final OaBrokerMainEntity target = new OaBrokerMainEntity(); source.setPublicationdate("2018"); final List list = matcher.findDifferences(source, target); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } @Test diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index 974baa28b..a8bc03e31 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -9,67 +9,67 @@ import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerTypedValue; -public class TrustUtilsTest { +class TrustUtilsTest { private static final double THRESHOLD = 0.95; @Test - public void rescaleTest_1() { + void rescaleTest_1() { verifyValue(-0.3, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_2() { + void rescaleTest_2() { verifyValue(0.0, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_3() { + void rescaleTest_3() { verifyValue(0.5, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_4() { + void rescaleTest_4() { verifyValue(0.95, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_5() { + void rescaleTest_5() { verifyValue(0.96, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_6() { + void rescaleTest_6() { verifyValue(0.97, 0.3f); } @Test - public void rescaleTest_7() { + void rescaleTest_7() { verifyValue(0.98, 0.45f); } @Test - public void rescaleTest_8() { + void rescaleTest_8() { verifyValue(0.99, 0.6f); } @Test - public void rescaleTest_9() { + void rescaleTest_9() { verifyValue(1.00, BrokerConstants.MAX_TRUST); } @Test - public void rescaleTest_10() { + void rescaleTest_10() { verifyValue(1.01, BrokerConstants.MAX_TRUST); } @Test - public void rescaleTest_11() { + void rescaleTest_11() { verifyValue(2.00, BrokerConstants.MAX_TRUST); } @Test - public void test() throws Exception { + void test() { final OaBrokerMainEntity r1 = new OaBrokerMainEntity(); r1.getTitles().add("D-NET Service Package: Data Import"); r1.getPids().add(new OaBrokerTypedValue("doi", "123")); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java index 647a1b9c8..6a9b21b00 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java @@ -20,6 +20,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; @@ -43,23 +44,26 @@ abstract class AbstractSparkAction implements Serializable { protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - public ArgumentApplicationParser parser; // parameters for the spark action - public SparkSession spark; // the spark session + public final ArgumentApplicationParser parser; // parameters for the spark action + public final SparkSession spark; // the spark session - public AbstractSparkAction(ArgumentApplicationParser parser, SparkSession spark) { + protected AbstractSparkAction(ArgumentApplicationParser parser, SparkSession spark) { this.parser = parser; this.spark = spark; } public List getConfigurations(ISLookUpService isLookUpService, String orchestrator) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", orchestrator); String orchestratorProfile = isLookUpService.getResourceProfileByQuery(xquery); - final Document doc = new SAXReader().read(new StringReader(orchestratorProfile)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + + final Document doc = reader.read(new StringReader(orchestratorProfile)); final String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id"); @@ -93,7 +97,7 @@ abstract class AbstractSparkAction implements Serializable { } abstract void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException; + throws DocumentException, IOException, ISLookUpException, SAXException; protected static SparkSession getSparkSession(SparkConf conf) { return SparkSession.builder().config(conf).getOrCreate(); @@ -139,9 +143,7 @@ abstract class AbstractSparkAction implements Serializable { c -> c .stream() .filter(Objects::nonNull) - .filter(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue())) - .findFirst() - .isPresent()) + .anyMatch(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue()))) .orElse(false); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java index 558c7c440..9d767c4d2 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java @@ -23,6 +23,9 @@ public class DatePicker { private static final int YEAR_LB = 1300; private static final int YEAR_UB = Year.now().getValue() + 5; + private DatePicker() { + } + public static Field pick(final Collection dateofacceptance) { final Map frequencies = dateofacceptance @@ -61,7 +64,7 @@ public class DatePicker { .entrySet() .stream() .filter(e -> e.getValue() >= acceptThreshold) - .map(e -> e.getKey()) + .map(Map.Entry::getKey) .collect(Collectors.toList()); // cannot find strong majority diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 5ba6f6e6d..d65853aff 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -10,14 +10,11 @@ import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -25,11 +22,12 @@ import scala.Tuple2; public class DedupRecordFactory { - private static final Logger log = LoggerFactory.getLogger(DedupRecordFactory.class); - protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + private DedupRecordFactory() { + } + public static Dataset createDedupRecord( final SparkSession spark, final DataInfo dataInfo, @@ -67,7 +65,7 @@ public class DedupRecordFactory { value._1()._1(), value._2()._2()), Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) .groupByKey( - (MapFunction, String>) entity -> entity._1(), Encoders.STRING()) + (MapFunction, String>) Tuple2::_1, Encoders.STRING()) .mapGroups( (MapGroupsFunction, T>) (key, values) -> entityMerger(key, values, ts, dataInfo, clazz), @@ -91,7 +89,7 @@ public class DedupRecordFactory { entity.mergeFrom(duplicate); if (ModelSupport.isSubClass(duplicate, Result.class)) { Result r1 = (Result) duplicate; - if (r1.getAuthor() != null && r1.getAuthor().size() > 0) + if (r1.getAuthor() != null && !r1.getAuthor().isEmpty()) authors.add(r1.getAuthor()); if (r1.getDateofacceptance() != null) dates.add(r1.getDateofacceptance().getValue()); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java index 5806e9fa4..d79d24653 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java @@ -10,6 +10,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.collect.Sets; @@ -25,6 +26,9 @@ public class DedupUtility { public static final String OPENORGS_ID_PREFIX = "openorgs____"; public static final String CORDA_ID_PREFIX = "corda"; + private DedupUtility() { + } + public static Map constructAccumulator( final DedupConfig dedupConf, final SparkContext context) { @@ -92,14 +96,16 @@ public class DedupUtility { } public static List getConfigurations(String isLookUpUrl, String orchestrator) - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookUpUrl); final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", orchestrator); String orchestratorProfile = isLookUpService.getResourceProfileByQuery(xquery); - final Document doc = new SAXReader().read(new StringReader(orchestratorProfile)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(orchestratorProfile)); final String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id"); final List configurations = new ArrayList<>(); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java index 5506b5470..2cdc22153 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.dedup; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -33,9 +34,11 @@ public class DispatchEntitiesSparkJob { String jsonConfiguration = IOUtils .toString( - DispatchEntitiesSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json")); + Objects + .requireNonNull( + DispatchEntitiesSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 58009bfcf..a19f86380 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -42,7 +42,7 @@ public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); - private final static String ID_JPATH = "$.id"; + private static final String ID_JPATH = "$.id"; private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @@ -92,7 +92,7 @@ public class GroupEntitiesSparkJob { spark .read() .textFile(toSeq(listEntityPaths(inputPath, sc))) - .map((MapFunction) s -> parseOaf(s), Encoders.kryo(OafEntity.class)) + .map((MapFunction) GroupEntitiesSparkJob::parseOaf, Encoders.kryo(OafEntity.class)) .filter((FilterFunction) e -> StringUtils.isNotBlank(ModelSupport.idFn().apply(e))) .groupByKey((MapFunction) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING()) .agg(aggregator) @@ -188,7 +188,7 @@ public class GroupEntitiesSparkJob { try { return OBJECT_MAPPER.readValue(s, clazz); } catch (IOException e) { - throw new RuntimeException(e); + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java index dd9b16790..81cd30f88 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java @@ -15,13 +15,13 @@ public class IdGenerator implements Serializable { // pick the best pid from the list (consider date and pidtype) public static String generate(List> pids, String defaultID) { - if (pids == null || pids.size() == 0) + if (pids == null || pids.isEmpty()) return defaultID; Identifier bp = pids .stream() .min(Identifier::compareTo) - .get(); + .orElseThrow(() -> new IllegalStateException("unable to generate id")); String prefix = substringBefore(bp.getOriginalID(), "|"); String ns = substringBefore(substringAfter(bp.getOriginalID(), "|"), "::"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java index 1e13485e5..c9c9dd8fe 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java @@ -9,7 +9,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -17,6 +16,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.Block; @@ -63,7 +63,7 @@ public class SparkBlockStats extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException { + throws DocumentException, IOException, ISLookUpException, SAXException { // read oozie parameters final String graphBasePath = parser.get("graphBasePath"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java index 0aaa1e662..93027e99a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java @@ -6,7 +6,6 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -17,8 +16,6 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -78,7 +75,7 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction { saveParquet(rawRels, outputPath, SaveMode.Append); - log.info("Copied " + rawRels.count() + " Similarity Relations"); + log.info("Copied {} Similarity Relations", rawRels.count()); } private boolean filterOpenorgsRels(Relation rel) { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java index 9ece43891..bf0b7f687 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java @@ -2,34 +2,21 @@ package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; -import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.PairFunction; -import org.apache.spark.sql.*; -import org.apache.spark.sql.Dataset; -import org.dom4j.DocumentException; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.pace.util.MapDocumentUtil; -import scala.Tuple2; public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java index b41507e95..6989ec54b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java @@ -13,6 +13,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.EntityType; @@ -54,7 +55,7 @@ public class SparkCreateDedupRecord extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String graphBasePath = parser.get("graphBasePath"); final String isLookUpUrl = parser.get("isLookUpUrl"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index bfc605039..95e3dff28 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -24,6 +24,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import com.google.common.collect.Lists; import com.google.common.hash.Hashing; @@ -76,7 +77,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String graphBasePath = parser.get("graphBasePath"); final String workingPath = parser.get("workingPath"); @@ -161,11 +162,11 @@ public class SparkCreateMergeRels extends AbstractSparkAction { private ConnectedComponent generateID(String key, Iterator> values) { - List> identifiers = Lists.newArrayList(values).stream().map(v -> { - T entity = v._2(); - Identifier identifier = Identifier.newInstance(entity); - return identifier; - }).collect(Collectors.toList()); + List> identifiers = Lists + .newArrayList(values) + .stream() + .map(v -> Identifier.newInstance(v._2())) + .collect(Collectors.toList()); String rootID = IdGenerator.generate(identifiers, key); @@ -235,7 +236,6 @@ public class SparkCreateMergeRels extends AbstractSparkAction { info.setProvenanceaction(provenanceAction); // TODO calculate the trust value based on the similarity score of the elements in the CC - // info.setTrust(); r.setDataInfo(info); return r; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java index df3db7add..8e5e9fd69 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java @@ -18,9 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java index 884967364..f89f634b5 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java @@ -7,7 +7,6 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; @@ -17,6 +16,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.Block; @@ -26,8 +26,6 @@ import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.model.FieldListImpl; -import eu.dnetlib.pace.model.FieldValueImpl; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; @@ -56,7 +54,7 @@ public class SparkCreateSimRels extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException { + throws DocumentException, IOException, ISLookUpException, SAXException { // read oozie parameters final String graphBasePath = parser.get("graphBasePath"); @@ -110,9 +108,6 @@ public class SparkCreateSimRels extends AbstractSparkAction { Encoders.bean(Relation.class)); saveParquet(simRels, outputPath, SaveMode.Overwrite); - - log.info("Generated " + simRels.count() + " Similarity Relations"); - } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java index 657d5a832..d12048b02 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java @@ -6,10 +6,6 @@ import java.util.Optional; import java.util.Properties; import org.apache.commons.io.IOUtils; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.function.FilterFunction; @@ -23,9 +19,9 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; @@ -84,8 +80,9 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction { log.info("table: '{}'", dbTable); log.info("dbPwd: '{}'", "xxx"); - final String entityPath = DedupUtility.createEntityPath(graphBasePath, "organization"); - final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, "organization"); + final String organizazion = ModelSupport.getMainType(EntityType.organization); + final String entityPath = DedupUtility.createEntityPath(graphBasePath, organizazion); + final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organizazion); final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation"); Dataset newOrgs = createNewOrgs(spark, mergeRelPath, relationPath, entityPath); @@ -115,7 +112,7 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction { .textFile(relationPath) .map(patchRelFn(), Encoders.bean(Relation.class)) .toJavaRDD() - .filter(r -> filterRels(r, "organization")) + .filter(r -> filterRels(r, ModelSupport.getMainType(EntityType.organization))) // take the worst id of the diffrel: .mapToPair(rel -> { if (DedupUtility.compareOpenOrgIds(rel.getSource(), rel.getTarget()) > 0) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java index 08b39793e..61325ab50 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java @@ -21,6 +21,7 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -32,6 +33,7 @@ import scala.Tuple3; public class SparkPrepareOrgRels extends AbstractSparkAction { private static final Logger log = LoggerFactory.getLogger(SparkPrepareOrgRels.class); + public static final String GROUP_PREFIX = "group::"; public SparkPrepareOrgRels(ArgumentApplicationParser parser, SparkSession spark) { super(parser, spark); @@ -41,7 +43,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareOrgRels.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareOrgRels_parameters.json"))); parser.parseArgument(args); @@ -81,8 +83,9 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { log.info("table: '{}'", dbTable); log.info("dbPwd: '{}'", "xxx"); - final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, "organization"); - final String entityPath = DedupUtility.createEntityPath(graphBasePath, "organization"); + final String organization = ModelSupport.getMainType(EntityType.organization); + final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organization); + final String entityPath = DedupUtility.createEntityPath(graphBasePath, organization); final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation"); Dataset relations = createRelations(spark, mergeRelPath, relationPath, entityPath); @@ -168,7 +171,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { .map(g -> Lists.newArrayList(g._2())) .filter(l -> l.size() > 1) .flatMap(l -> { - String groupId = "group::" + UUID.randomUUID(); + String groupId = GROUP_PREFIX + UUID.randomUUID(); List ids = sortIds(l); // sort IDs by type List, String>> rels = new ArrayList<>(); String source = ids.get(0); @@ -192,7 +195,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { .collect(Collectors.toList()))) // : take only relations with only the group_id, it // means they are correct. If the diffRel is present the relation has to be removed - .filter(g -> g._2().size() == 1 && g._2().get(0).contains("group::")) + .filter(g -> g._2().size() == 1 && g._2().get(0).contains(GROUP_PREFIX)) .map( t -> new Tuple3<>( t._1().split("@@@")[0], @@ -255,7 +258,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { // Sort IDs basing on the type. Priority: 1) openorgs, 2)corda, 3)alphabetic public static List sortIds(List ids) { - ids.sort((o1, o2) -> DedupUtility.compareOpenOrgIds(o1, o2)); + ids.sort(DedupUtility::compareOpenOrgIds); return ids; } @@ -289,9 +292,10 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { List> rels = new ArrayList<>(); for (String id1 : g._2()) { for (String id2 : g._2()) { - if (!id1.equals(id2)) - if (id1.contains(DedupUtility.OPENORGS_ID_PREFIX) && !id2.contains("openorgsmesh")) - rels.add(new Tuple2<>(id1, id2)); + if (!id1.equals(id2) && id1.contains(DedupUtility.OPENORGS_ID_PREFIX) + && !id2.contains("openorgsmesh")) { + rels.add(new Tuple2<>(id1, id2)); + } } } return rels.iterator(); @@ -310,7 +314,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { r._2()._2().getCountry() != null ? r._2()._2().getCountry().getClassid() : "", r._2()._2().getWebsiteurl() != null ? r._2()._2().getWebsiteurl().getValue() : "", r._2()._2().getCollectedfrom().get(0).getValue(), - "group::" + r._1()._1(), + GROUP_PREFIX + r._1()._1(), structuredPropertyListToString(r._2()._2().getPid()), parseECField(r._2()._2().getEclegalbody()), parseECField(r._2()._2().getEclegalperson()), diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java index 220b0f483..0fa41bd6d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java @@ -40,7 +40,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPropagateRelation.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); @@ -113,7 +113,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { .join(r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()), Encoders.STRING()) .agg(new RelationAggregator().toColumn()) - .map((MapFunction, Relation>) t -> t._2(), Encoders.bean(Relation.class)); + .map((MapFunction, Relation>) Tuple2::_2, Encoders.bean(Relation.class)); } // redirect the relations to the dedupID @@ -163,7 +163,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { private FilterFunction getRelationFilterFunction() { return r -> StringUtils.isNotBlank(r.getSource()) || StringUtils.isNotBlank(r.getTarget()) || - StringUtils.isNotBlank(r.getRelClass()) || + StringUtils.isNotBlank(r.getRelType()) || StringUtils.isNotBlank(r.getSubRelType()) || StringUtils.isNotBlank(r.getRelClass()); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java index fdef7f77d..49021ab58 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; +import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; @@ -13,7 +14,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -26,8 +26,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.util.MapDocumentUtil; @@ -72,83 +74,76 @@ public class SparkUpdateEntity extends AbstractSparkAction { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - // for each entity - ModelSupport.entityTypes - .forEach( - (type, clazz) -> { - final String outputPath = dedupGraphPath + "/" + type; - removeOutputDir(spark, outputPath); - final String ip = DedupUtility.createEntityPath(graphBasePath, type.toString()); - if (HdfsSupport.exists(ip, sc.hadoopConfiguration())) { - JavaRDD sourceEntity = sc - .textFile(DedupUtility.createEntityPath(graphBasePath, type.toString())); + for (Map.Entry e : ModelSupport.entityTypes.entrySet()) { + final EntityType type = e.getKey(); + final Class clazz = e.getValue(); + final String outputPath = dedupGraphPath + "/" + type; + removeOutputDir(spark, outputPath); + final String ip = DedupUtility.createEntityPath(graphBasePath, type.toString()); + if (HdfsSupport.exists(ip, sc.hadoopConfiguration())) { + JavaRDD sourceEntity = sc + .textFile(DedupUtility.createEntityPath(graphBasePath, type.toString())); - if (mergeRelExists(workingPath, type.toString())) { + if (mergeRelExists(workingPath, type.toString())) { - final String mergeRelPath = DedupUtility - .createMergeRelPath(workingPath, "*", type.toString()); - final String dedupRecordPath = DedupUtility - .createDedupRecordPath(workingPath, "*", type.toString()); + final String mergeRelPath = DedupUtility + .createMergeRelPath(workingPath, "*", type.toString()); + final String dedupRecordPath = DedupUtility + .createDedupRecordPath(workingPath, "*", type.toString()); - final Dataset rel = spark - .read() - .load(mergeRelPath) - .as(Encoders.bean(Relation.class)); + final Dataset rel = spark + .read() + .load(mergeRelPath) + .as(Encoders.bean(Relation.class)); - final JavaPairRDD mergedIds = rel - .where("relClass == 'merges'") - .where("source != target") - .select(rel.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2<>(r.getString(0), "d")); + final JavaPairRDD mergedIds = rel + .where("relClass == 'merges'") + .where("source != target") + .select(rel.col("target")) + .distinct() + .toJavaRDD() + .mapToPair( + (PairFunction) r -> new Tuple2<>(r.getString(0), "d")); - JavaPairRDD entitiesWithId = sourceEntity - .mapToPair( - (PairFunction) s -> new Tuple2<>( - MapDocumentUtil.getJPathString(IDJSONPATH, s), s)); - if (type == EntityType.organization) // exclude root records from organizations - entitiesWithId = excludeRootOrgs(entitiesWithId, rel); + JavaPairRDD entitiesWithId = sourceEntity + .mapToPair( + (PairFunction) s -> new Tuple2<>( + MapDocumentUtil.getJPathString(IDJSONPATH, s), s)); + if (type == EntityType.organization) // exclude root records from organizations + entitiesWithId = excludeRootOrgs(entitiesWithId, rel); - JavaRDD map = entitiesWithId - .leftOuterJoin(mergedIds) - .map(k -> { - if (k._2()._2().isPresent()) { - return updateDeletedByInference(k._2()._1(), clazz); - } - return k._2()._1(); - }); + JavaRDD map = entitiesWithId + .leftOuterJoin(mergedIds) + .map(k -> { + if (k._2()._2().isPresent()) { + return updateDeletedByInference(k._2()._1(), clazz); + } + return k._2()._1(); + }); - sourceEntity = map.union(sc.textFile(dedupRecordPath)); - - } - - sourceEntity.saveAsTextFile(outputPath, GzipCodec.class); - } - }); + sourceEntity = map.union(sc.textFile(dedupRecordPath)); + } + sourceEntity.saveAsTextFile(outputPath, GzipCodec.class); + } + } } - public boolean mergeRelExists(String basePath, String entity) { + public boolean mergeRelExists(String basePath, String entity) throws IOException { boolean result = false; - try { - FileSystem fileSystem = FileSystem.get(new Configuration()); - FileStatus[] fileStatuses = fileSystem.listStatus(new Path(basePath)); + FileSystem fileSystem = FileSystem.get(new Configuration()); + FileStatus[] fileStatuses = fileSystem.listStatus(new Path(basePath)); - for (FileStatus fs : fileStatuses) { - if (fs.isDirectory()) - if (fileSystem - .exists( - new Path(DedupUtility.createMergeRelPath(basePath, fs.getPath().getName(), entity)))) - result = true; + for (FileStatus fs : fileStatuses) { + final Path mergeRelPath = new Path( + DedupUtility.createMergeRelPath(basePath, fs.getPath().getName(), entity)); + if (fs.isDirectory() && fileSystem.exists(mergeRelPath)) { + result = true; } - - return result; - } catch (IOException e) { - throw new RuntimeException(e); } + + return result; } private static String updateDeletedByInference( diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java index 3a986a9dd..3e564052e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java @@ -3,29 +3,15 @@ package eu.dnetlib.dhp.oa.dedup.graph; import java.io.IOException; import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang.StringUtils; -import org.apache.spark.api.java.function.MapFunction; import org.codehaus.jackson.annotate.JsonIgnore; -import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.dedup.DedupUtility; -import eu.dnetlib.dhp.oa.dedup.IdGenerator; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.util.MapDocumentUtil; import eu.dnetlib.pace.util.PaceException; public class ConnectedComponent implements Serializable { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java index e821d7ef5..a25a853ef 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java @@ -19,7 +19,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.PidComparator; import eu.dnetlib.dhp.schema.oaf.utils.PidType; -public class Identifier implements Serializable, Comparable { +public class Identifier implements Serializable, Comparable> { public static final String DATE_FORMAT = "yyyy-MM-dd"; public static final String BASE_DATE = "2000-01-01"; @@ -29,8 +29,8 @@ public class Identifier implements Serializable, Comparable // cached date value private Date date = null; - public static Identifier newInstance(T entity) { - return new Identifier(entity); + public static Identifier newInstance(T entity) { + return new Identifier<>(entity); } public Identifier(T entity) { @@ -88,7 +88,7 @@ public class Identifier implements Serializable, Comparable } @Override - public int compareTo(Identifier i) { + public int compareTo(Identifier i) { // priority in comparisons: 1) pidtype, 2) collectedfrom (depending on the entity type) , 3) date 4) // alphabetical order of the originalID diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java index 7c58c375a..daea29a07 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java @@ -10,12 +10,12 @@ import org.junit.jupiter.api.Test; import com.clearspring.analytics.util.Lists; -public class DatePickerTest { +class DatePickerTest { Collection dates = Lists.newArrayList(); @Test - public void testPickISO() { + void testPickISO() { dates.add("2016-01-01T12:00:00Z"); dates.add("2016-06-16T12:00:00Z"); dates.add("2020-01-01T12:00:00Z"); @@ -24,7 +24,7 @@ public class DatePickerTest { } @Test - public void testPickSimple() { + void testPickSimple() { dates.add("2016-01-01"); dates.add("2016-06-16"); dates.add("2020-01-01"); @@ -33,7 +33,7 @@ public class DatePickerTest { } @Test - public void testPickFrequent() { + void testPickFrequent() { dates.add("2016-02-01"); dates.add("2016-02-01"); dates.add("2016-02-01"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index 80154fbb7..e86f91f99 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -20,7 +20,7 @@ import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; -public class EntityMergerTest implements Serializable { +class EntityMergerTest implements Serializable { private List> publications; private List> publications2; @@ -54,7 +54,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void softwareMergerTest() throws InstantiationException, IllegalAccessException { + void softwareMergerTest() throws InstantiationException, IllegalAccessException { List> softwares = readSample( testEntityBasePath + "/software_merge.json", Software.class); @@ -69,7 +69,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest() throws InstantiationException, IllegalAccessException { + void publicationMergerTest() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); @@ -125,7 +125,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest2() throws InstantiationException, IllegalAccessException { + void publicationMergerTest2() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); @@ -137,7 +137,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest3() throws InstantiationException, IllegalAccessException { + void publicationMergerTest3() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications3.iterator(), 0, dataInfo, Publication.class); @@ -147,7 +147,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException { + void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications4.iterator(), 0, dataInfo, Publication.class); @@ -157,7 +157,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException { + void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException { System.out .println( diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java index 1a279fac7..2d6637882 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java @@ -56,7 +56,7 @@ public class IdGeneratorTest { } @Test - public void generateIdTest1() { + void generateIdTest1() { String id1 = IdGenerator.generate(bestIds, "50|defaultID"); System.out @@ -66,7 +66,7 @@ public class IdGeneratorTest { } @Test - public void generateIdTest2() { + void generateIdTest2() { String id1 = IdGenerator.generate(bestIds2, "50|defaultID"); String id2 = IdGenerator.generate(bestIds3, "50|defaultID"); @@ -82,7 +82,7 @@ public class IdGeneratorTest { } @Test - public void generateIdOrganizationTest() { + void generateIdOrganizationTest() { String id1 = IdGenerator.generate(bestIdsOrg, "20|defaultID"); assertEquals("20|openorgs____::599c15a70fcb03be6ba08f75f14d6076", id1); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index bf4913056..1860a3ff0 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -12,7 +12,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; -import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -148,7 +147,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(1) - public void createSimRelsTest() throws Exception { + void createSimRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -203,7 +202,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(2) - public void cutMergeRelsTest() throws Exception { + void cutMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -299,7 +298,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(3) - public void createMergeRelsTest() throws Exception { + void createMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -355,7 +354,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(4) - public void createDedupRecordTest() throws Exception { + void createDedupRecordTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -402,7 +401,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(5) - public void updateEntityTest() throws Exception { + void updateEntityTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -518,7 +517,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(6) - public void propagateRelationTest() throws Exception { + void propagateRelationTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -568,7 +567,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(7) - public void testRelations() throws Exception { + void testRelations() throws Exception { testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java index 97cfab118..9312d83b1 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java @@ -67,7 +67,7 @@ public class SparkOpenorgsDedupTest implements Serializable { public static void cleanUp() throws IOException, URISyntaxException { testGraphBasePath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/openorgs/dedup").toURI()) + .get(SparkOpenorgsDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/openorgs/dedup").toURI()) .toFile() .getAbsolutePath(); testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") @@ -101,7 +101,7 @@ public class SparkOpenorgsDedupTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkOpenorgsDedupTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator_openorgs.xml"))); @@ -110,14 +110,14 @@ public class SparkOpenorgsDedupTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkOpenorgsDedupTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); } @Test @Order(1) - public void createSimRelsTest() throws Exception { + void createSimRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -148,7 +148,7 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(2) - public void copyOpenorgsSimRels() throws Exception { + void copyOpenorgsSimRels() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( @@ -177,7 +177,7 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(3) - public void createMergeRelsTest() throws Exception { + void createMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -230,11 +230,11 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(4) - public void prepareOrgRelsTest() throws Exception { + void prepareOrgRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareOrgRels.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareOrgRels_parameters.json"))); parser @@ -313,11 +313,11 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(5) - public void prepareNewOrgsTest() throws Exception { + void prepareNewOrgsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareNewOrgs.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareNewOrgs_parameters.json"))); parser diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java index 606dd9e5b..2349ffebe 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.dedup; import static java.nio.file.Files.createTempDirectory; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.mockito.Mockito.lenient; import java.io.File; @@ -12,8 +11,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; -import java.util.Collections; -import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -32,9 +29,6 @@ import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -110,7 +104,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(1) - public void copyOpenorgsMergeRelTest() throws Exception { + void copyOpenorgsMergeRelTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -143,7 +137,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(2) - public void createOrgsDedupRecordTest() throws Exception { + void createOrgsDedupRecordTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -176,7 +170,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(3) - public void updateEntityTest() throws Exception { + void updateEntityTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -216,7 +210,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(4) - public void copyRelationsNoOpenorgsTest() throws Exception { + void copyRelationsNoOpenorgsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -239,7 +233,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(5) - public void propagateRelationsTest() throws Exception { + void propagateRelationsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java index 31de8d951..2efbe260a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java @@ -124,7 +124,7 @@ public class SparkStatsTest implements Serializable { } @Test - public void createBlockStatsTest() throws Exception { + void createBlockStatsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java index 1759180d2..7348a3bd2 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java @@ -1,13 +1,15 @@ package eu.dnetlib.dhp.oa.dedup.jpath; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.platform.commons.util.StringUtils; import eu.dnetlib.pace.config.DedupConfig; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.MapDocumentUtil; -public class JsonPathTest { +class JsonPathTest { String json = "{\t\"dataInfo\":{\t\t\"invisible\":false,\t\t\"inferred\":false,\t\t\"deletedbyinference\":false,\t\t\"trust\":\"0.810000002384185791\",\t\t\"inferenceprovenance\":\"\",\t\t\"provenanceaction\":{\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t}\t},\t\"lastupdatetimestamp\":1584960968152,\t\"id\":\"20|corda__h2020::9faf23721249f26ac2c16eb857ea1fb9\",\t\"originalId\":[\t\t\"corda__h2020::927957582\"\t],\t\"collectedfrom\":[\t\t{\t\t\t\"key\":\"openaire____::corda_h2020\",\t\t\t\"value\":\"CORDA - COmmon Research DAta Warehouse - Horizon 2020\",\t\t\t\"dataInfo\":null\t\t}\t],\t\"pid\":[\t],\t\"dateofcollection\":\"2016-06-05\",\t\"dateoftransformation\":\"2019-11-19\",\t\"extraInfo\":[\t],\t\"oaiprovenance\":null,\t\"legalshortname\":{\t\t\"value\":\"Comentor AB\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"legalname\":{\t\t\"value\":\"Comentor AB\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"alternativeNames\":[\t],\t\"websiteurl\":{\t\t\"value\":\"http://www.comentor.se\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"logourl\":null,\t\"eclegalbody\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"eclegalperson\":{\t\t\"value\":\"true\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecnonprofit\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecresearchorganization\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"echighereducation\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecinternationalorganizationeurinterests\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecinternationalorganization\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecenterprise\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecsmevalidated\":{\t\t\"value\":\"true\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecnutscode\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"country\":null}"; DedupConfig conf = DedupConfig @@ -283,15 +285,18 @@ public class JsonPathTest { + "}"); @Test - public void testJPath() throws Exception { + void testJPath() { MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(conf, json); + Assertions.assertNotNull(d); + Assertions.assertTrue(StringUtils.isNotBlank(d.getIdentifier())); + System.out.println("d = " + d); } @Test - public void testNull() throws Exception { + void testNull() { final Object p = null; System.out.println((String) p); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java index ee6136b58..c6c207727 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java @@ -2,6 +2,7 @@ package eu.dnetlib.doiboost.crossref; import java.io.ByteArrayOutputStream; +import java.util.Objects; import java.util.Optional; import java.util.zip.Inflater; @@ -22,9 +23,11 @@ public class CrossrefImporter { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - CrossrefImporter.class - .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/import_from_es.json"))); + Objects + .requireNonNull( + CrossrefImporter.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/import_from_es.json")))); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java index dcebbbcac..6d6a4ca78 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.crossref; +import java.io.IOException; import java.util.Iterator; import java.util.List; @@ -11,8 +12,6 @@ import org.apache.http.client.methods.HttpPost; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.jayway.jsonpath.JsonPath; @@ -57,8 +56,8 @@ public class ESClient implements Iterator { try (CloseableHttpResponse response = client.execute(httpPost)) { return IOUtils.toString(response.getEntity().getContent()); } - } catch (Throwable e) { - throw new RuntimeException("Error on executing request ", e); + } catch (IOException e) { + throw new IllegalStateException("Error on executing request ", e); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java index feb540fcd..f725b3222 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java @@ -29,8 +29,10 @@ public class ActivitiesDecompressor { private static final int MAX_XML_WORKS_PARSED = -1; private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; - public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) - throws Exception { + private ActivitiesDecompressor() { + } + + public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) throws IOException { String uri = inputUri; FileSystem fs = FileSystem.get(URI.create(uri), conf); Path inputPath = new Path(uri); @@ -44,7 +46,7 @@ public class ActivitiesDecompressor { InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarActivities(fs, conf, gzipInputStream, outputPath); + parseTarActivities(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -52,8 +54,7 @@ public class ActivitiesDecompressor { } } - private static void parseTarActivities( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + private static void parseTarActivities(Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int doiFound = 0; int errorFromOrcidFound = 0; @@ -79,11 +80,11 @@ public class ActivitiesDecompressor { BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from // tarInput String line; - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } - WorkData workData = XMLRecordParser.VTDParseWorkData(buffer.toString().getBytes()); + WorkData workData = XMLRecordParser.VTDParseWorkData(builder.toString().getBytes()); if (workData != null) { if (workData.getErrorCode() != null) { errorFromOrcidFound += 1; @@ -113,7 +114,7 @@ public class ActivitiesDecompressor { } } else { - Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer); + Log.warn("Data not retrievable [" + entry.getName() + "] " + builder); xmlParserErrorFound += 1; } } @@ -177,11 +178,11 @@ public class ActivitiesDecompressor { counter++; BufferedReader br = new BufferedReader(new InputStreamReader(tais)); String line; - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } - String xml = buffer.toString(); + String xml = builder.toString(); String[] filenameParts = filename.split("/"); final Text key = new Text( XMLRecordParser diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java index 4de4a0266..99587b16a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java @@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.orcid; import java.io.IOException; +import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -10,7 +11,6 @@ import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork; public class ExtractXMLActivitiesData extends OrcidDSManager { private String outputWorksPath; @@ -22,11 +22,11 @@ public class ExtractXMLActivitiesData extends OrcidDSManager { extractXMLActivitiesData.extractWorks(); } - private void loadArgs(String[] args) throws Exception { + private void loadArgs(String[] args) throws ParseException, IOException { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - GenOrcidAuthorWork.class + ExtractXMLActivitiesData.class .getResourceAsStream( "/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json"))); parser.parseArgument(args); @@ -43,7 +43,6 @@ public class ExtractXMLActivitiesData extends OrcidDSManager { private void extractWorks() throws Exception { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java index 5c2a35229..4121f3391 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java @@ -5,12 +5,13 @@ import java.io.IOException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; +import com.ximpleware.ParseException; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork; +import eu.dnetlib.dhp.parser.utility.VtdException; public class ExtractXMLSummariesData extends OrcidDSManager { @@ -27,7 +28,7 @@ public class ExtractXMLSummariesData extends OrcidDSManager { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - GenOrcidAuthorWork.class + ExtractXMLSummariesData.class .getResourceAsStream( "/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json"))); parser.parseArgument(args); @@ -42,9 +43,8 @@ public class ExtractXMLSummariesData extends OrcidDSManager { Log.info("Output Authors Data: " + outputAuthorsPath); } - public void extractAuthors() throws Exception { + public void extractAuthors() throws IOException, VtdException, ParseException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java index 3b4033450..7e4869fdd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java @@ -22,9 +22,8 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager { orcidAuthorsDOIsDataGen.generateAuthorsDOIsData(); } - public void generateAuthorsDOIsData() throws Exception { + public void generateAuthorsDOIsData() throws IOException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputAuthorsDOIsPath)); ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java index 73a4bfd05..0b4ef279d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java @@ -25,9 +25,8 @@ public class OrcidDSManager { orcidDSManager.generateAuthors(); } - public void generateAuthors() throws Exception { + public void generateAuthors() throws IOException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 8cf070213..2b8e42bf6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.FileNotFoundException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Optional; @@ -64,7 +65,7 @@ public class SparkDownloadOrcidAuthors { String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); logger.info("lastUpdate: {}", lastUpdate); if (StringUtils.isBlank(lastUpdate)) { - throw new RuntimeException("last update info not found"); + throw new FileNotFoundException("last update info not found"); } JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -82,7 +83,7 @@ public class SparkDownloadOrcidAuthors { JavaPairRDD lamdaFileRDD = sc .sequenceFile(workingPath + lambdaFileName, Text.class, Text.class); final long lamdaFileRDDCount = lamdaFileRDD.count(); - logger.info("Data retrieved: " + lamdaFileRDDCount); + logger.info("Data retrieved: {}", lamdaFileRDDCount); Function, Boolean> isModifiedAfterFilter = data -> { String orcidId = data._1().toString(); @@ -95,7 +96,7 @@ public class SparkDownloadOrcidAuthors { return false; }; - Function, Tuple2> downloadRecordFunction = data -> { + Function, Tuple2> downloadRecordFn = data -> { String orcidId = data._1().toString(); String lastModifiedDate = data._2().toString(); final DownloadedRecordData downloaded = new DownloadedRecordData(); @@ -118,14 +119,19 @@ public class SparkDownloadOrcidAuthors { switch (statusCode) { case 403: errorHTTP403Acc.add(1); + break; case 404: errorHTTP404Acc.add(1); + break; case 409: errorHTTP409Acc.add(1); + break; case 503: errorHTTP503Acc.add(1); + break; case 525: errorHTTP525Acc.add(1); + break; default: errorHTTPGenericAcc.add(1); } @@ -145,33 +151,41 @@ public class SparkDownloadOrcidAuthors { logger.info("Start execution ..."); JavaPairRDD authorsModifiedRDD = lamdaFileRDD.filter(isModifiedAfterFilter); long authorsModifiedCount = authorsModifiedRDD.count(); - logger.info("Authors modified count: " + authorsModifiedCount); + logger.info("Authors modified count: {}", authorsModifiedCount); logger.info("Start downloading ..."); - authorsModifiedRDD - .repartition(100) - .map(downloadRecordFunction) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) - .saveAsNewAPIHadoopFile( - workingPath.concat(outputPath), - Text.class, - Text.class, - SequenceFileOutputFormat.class, - sc.hadoopConfiguration()); - logger.info("parsedRecordsAcc: " + parsedRecordsAcc.value().toString()); - logger.info("modifiedRecordsAcc: " + modifiedRecordsAcc.value().toString()); - logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); - logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); - logger.info("errorHTTP404Acc: " + errorHTTP404Acc.value().toString()); - logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); - logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); - logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); - logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString()); + final JavaPairRDD pairRDD = authorsModifiedRDD + .repartition(100) + .map(downloadRecordFn) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))); + + saveAsSequenceFile(workingPath, outputPath, sc, pairRDD); + + logger.info("parsedRecordsAcc: {}", parsedRecordsAcc.value()); + logger.info("modifiedRecordsAcc: {}", modifiedRecordsAcc.value()); + logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); + logger.info("errorHTTP403Acc: {}", errorHTTP403Acc.value()); + logger.info("errorHTTP404Acc: {}", errorHTTP404Acc.value()); + logger.info("errorHTTP409Acc: {}", errorHTTP409Acc.value()); + logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); + logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); + logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); }); } + private static void saveAsSequenceFile(String workingPath, String outputPath, JavaSparkContext sc, + JavaPairRDD pairRDD) { + pairRDD + .saveAsNewAPIHadoopFile( + workingPath.concat(outputPath), + Text.class, + Text.class, + SequenceFileOutputFormat.class, + sc.hadoopConfiguration()); + } + public static boolean isModified(String orcidId, String modifiedDate, String lastUpdate) { Date modifiedDateDt; Date lastUpdateDt; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 59de7ca80..cab538783 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,8 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; -import java.text.SimpleDateFormat; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -13,7 +11,6 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; @@ -61,7 +58,7 @@ public class SparkDownloadOrcidWorks { .orElse(Boolean.TRUE); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); - logger.info("workingPath: ", workingPath); + logger.info("workingPath: {}", workingPath); final String outputPath = parser.get("outputPath"); final String token = parser.get("token"); final String hdfsServerUri = parser.get("hdfsServerUri"); @@ -169,20 +166,25 @@ public class SparkDownloadOrcidWorks { switch (statusCode) { case 403: errorHTTP403Acc.add(1); + break; case 404: errorHTTP404Acc.add(1); + break; case 409: errorHTTP409Acc.add(1); + break; case 503: errorHTTP503Acc.add(1); + break; case 525: errorHTTP525Acc.add(1); + break; default: errorHTTPGenericAcc.add(1); logger .info( - "Downloading " + orcidId + " status code: " - + response.getStatusLine().getStatusCode()); + "Downloading {} status code: {}", orcidId, + response.getStatusLine().getStatusCode()); } return downloaded.toTuple2(); } @@ -199,24 +201,24 @@ public class SparkDownloadOrcidWorks { .flatMap(retrieveWorkUrlFunction) .repartition(100) .map(downloadWorkFunction) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) .saveAsTextFile(workingPath.concat(outputPath), GzipCodec.class); - logger.info("updatedAuthorsAcc: " + updatedAuthorsAcc.value().toString()); - logger.info("parsedAuthorsAcc: " + parsedAuthorsAcc.value().toString()); - logger.info("parsedWorksAcc: " + parsedWorksAcc.value().toString()); - logger.info("modifiedWorksAcc: " + modifiedWorksAcc.value().toString()); - logger.info("maxModifiedWorksLimitAcc: " + maxModifiedWorksLimitAcc.value().toString()); - logger.info("errorCodeFoundAcc: " + errorCodeFoundAcc.value().toString()); - logger.info("errorLoadingJsonFoundAcc: " + errorLoadingJsonFoundAcc.value().toString()); - logger.info("errorLoadingXMLFoundAcc: " + errorLoadingXMLFoundAcc.value().toString()); - logger.info("errorParsingXMLFoundAcc: " + errorParsingXMLFoundAcc.value().toString()); - logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); - logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); - logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); - logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); - logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); - logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString()); + logger.info("updatedAuthorsAcc: {}", updatedAuthorsAcc.value()); + logger.info("parsedAuthorsAcc: {}", parsedAuthorsAcc.value()); + logger.info("parsedWorksAcc: {}", parsedWorksAcc.value()); + logger.info("modifiedWorksAcc: {}", modifiedWorksAcc.value()); + logger.info("maxModifiedWorksLimitAcc: {}", maxModifiedWorksLimitAcc.value()); + logger.info("errorCodeFoundAcc: {}", errorCodeFoundAcc.value()); + logger.info("errorLoadingJsonFoundAcc: {}", errorLoadingJsonFoundAcc.value()); + logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value()); + logger.info("errorParsingXMLFoundAcc: {}", errorParsingXMLFoundAcc.value()); + logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); + logger.info("errorHTTP403Acc: {}", errorHTTP403Acc.value()); + logger.info("errorHTTP409Acc: {}", errorHTTP409Acc.value()); + logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); + logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); + logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java index 178d07608..d2fed61ec 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java @@ -3,7 +3,8 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.*; +import java.io.BufferedReader; +import java.io.InputStreamReader; import java.net.URI; import java.util.Arrays; import java.util.List; @@ -15,7 +16,6 @@ import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; @@ -28,10 +28,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; public class SparkGenLastModifiedSeq { - private static String hdfsServerUri; - private static String workingPath; - private static String outputPath; - private static String lambdaFileName; public static void main(String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -45,12 +41,14 @@ public class SparkGenLastModifiedSeq { .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); - hdfsServerUri = parser.get("hdfsServerUri"); - workingPath = parser.get("workingPath"); - outputPath = parser.get("outputPath"); - lambdaFileName = parser.get("lambdaFileName"); - String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); - String lastModifiedDateFromLambdaFileUri = "last_modified_date_from_lambda_file.txt"; + + final String hdfsServerUri = parser.get("hdfsServerUri"); + final String workingPath = parser.get("workingPath"); + final String outputPath = parser.get("outputPath"); + final String lambdaFileName = parser.get("lambdaFileName"); + + final String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); + final String lastModifiedDateFromLambdaFileUri = "last_modified_date_from_lambda_file.txt"; SparkConf sparkConf = new SparkConf(); runWithSparkSession( @@ -64,7 +62,7 @@ public class SparkGenLastModifiedSeq { .concat(workingPath) .concat(outputPath)); Path hdfsreadpath = new Path(lambdaFileUri); - Configuration conf = new Configuration(); + Configuration conf = spark.sparkContext().hadoopConfiguration(); conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath)); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java index 7d9f39d05..3ecb4f5e3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java @@ -3,7 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -53,13 +52,13 @@ public class SparkGenerateDoiAuthorList { .orElse(Boolean.TRUE); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); - logger.info("workingPath: ", workingPath); + logger.info("workingPath: {}", workingPath); final String outputDoiAuthorListPath = parser.get("outputDoiAuthorListPath"); - logger.info("outputDoiAuthorListPath: ", outputDoiAuthorListPath); + logger.info("outputDoiAuthorListPath: {}", outputDoiAuthorListPath); final String authorsPath = parser.get("authorsPath"); - logger.info("authorsPath: ", authorsPath); + logger.info("authorsPath: {}", authorsPath); final String xmlWorksPath = parser.get("xmlWorksPath"); - logger.info("xmlWorksPath: ", xmlWorksPath); + logger.info("xmlWorksPath: {}", xmlWorksPath); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -128,8 +127,7 @@ public class SparkGenerateDoiAuthorList { .concat( d1.stream(), d2.stream()); - List mergedAuthors = mergedStream.collect(Collectors.toList()); - return mergedAuthors; + return mergedStream.collect(Collectors.toList()); } if (d1 != null) { return d1; @@ -170,14 +168,6 @@ public class SparkGenerateDoiAuthorList { return authorData; } - private static WorkData loadWorkFromJson(Text orcidId, Text json) { - WorkData workData = new WorkData(); - workData.setOid(orcidId.toString()); - JsonElement jElement = new JsonParser().parse(json.toString()); - workData.setDoi(getJsonValue(jElement, "doi")); - return workData; - } - private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { JsonElement name = null; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java index 51326c610..1727f1825 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java @@ -4,7 +4,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static org.apache.spark.sql.functions.*; -import java.io.IOException; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -17,8 +16,10 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,11 +36,12 @@ import scala.Tuple2; public class SparkUpdateOrcidAuthors { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -53,7 +55,6 @@ public class SparkUpdateOrcidAuthors { .map(Boolean::valueOf) .orElse(Boolean.TRUE); final String workingPath = parser.get("workingPath"); -// final String outputPath = parser.get("outputPath"); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -87,7 +88,7 @@ public class SparkUpdateOrcidAuthors { String jsonData = data._2().toString(); JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); + if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); if (StringUtils.isEmpty(compressedData)) { @@ -135,7 +136,7 @@ public class SparkUpdateOrcidAuthors { .col("authorData.oid") .equalTo(downloadedAuthorSummaryDS.col("authorData.oid")), "full_outer") - .map(value -> { + .map((MapFunction, AuthorSummary>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -183,7 +184,8 @@ public class SparkUpdateOrcidAuthors { .groupBy("authorData.oid") .agg(array_max(collect_list("downloadDate"))) .map( - row -> new Tuple2<>(row.get(0).toString(), row.get(1).toString()), + (MapFunction>) row -> new Tuple2<>(row.get(0).toString(), + row.get(1).toString()), Encoders.tuple(Encoders.STRING(), Encoders.STRING())) .toJavaRDD() .collect(); @@ -214,25 +216,24 @@ public class SparkUpdateOrcidAuthors { .dropDuplicates("downloadDate", "authorData"); cleanedDS .toJavaRDD() - .map(authorSummary -> OBJECT_MAPPER.writeValueAsString(authorSummary)) + .map(OBJECT_MAPPER::writeValueAsString) .saveAsTextFile(workingPath.concat("orcid_dataset/new_authors"), GzipCodec.class); long cleanedDSCount = cleanedDS.count(); - logger.info("report_oldAuthorsFoundAcc: " + oldAuthorsFoundAcc.value().toString()); - logger.info("report_newAuthorsFoundAcc: " + newAuthorsFoundAcc.value().toString()); - logger.info("report_updatedAuthorsFoundAcc: " + updatedAuthorsFoundAcc.value().toString()); - logger.info("report_errorCodeFoundAcc: " + errorCodeAuthorsFoundAcc.value().toString()); - logger.info("report_errorLoadingJsonFoundAcc: " + errorLoadingAuthorsJsonFoundAcc.value().toString()); - logger.info("report_errorParsingXMLFoundAcc: " + errorParsingAuthorsXMLFoundAcc.value().toString()); - logger.info("report_merged_count: " + mergedCount); - logger.info("report_cleaned_count: " + cleanedDSCount); + logger.info("report_oldAuthorsFoundAcc: {}", oldAuthorsFoundAcc.value()); + logger.info("report_newAuthorsFoundAcc: {}", newAuthorsFoundAcc.value()); + logger.info("report_updatedAuthorsFoundAcc: {}", updatedAuthorsFoundAcc.value()); + logger.info("report_errorCodeFoundAcc: {}", errorCodeAuthorsFoundAcc.value()); + logger.info("report_errorLoadingJsonFoundAcc: {}", errorLoadingAuthorsJsonFoundAcc.value()); + logger.info("report_errorParsingXMLFoundAcc: {}", errorParsingAuthorsXMLFoundAcc.value()); + logger.info("report_merged_count: {}", mergedCount); + logger.info("report_cleaned_count: {}", cleanedDSCount); }); } private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java index fa17e97e3..aad202ff6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java @@ -3,17 +3,16 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; @@ -26,20 +25,19 @@ import com.google.gson.JsonElement; import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.orcid.AuthorSummary; import eu.dnetlib.dhp.schema.orcid.Work; import eu.dnetlib.dhp.schema.orcid.WorkDetail; -import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; import scala.Tuple2; public class SparkUpdateOrcidDatasets { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -53,7 +51,6 @@ public class SparkUpdateOrcidDatasets { .map(Boolean::valueOf) .orElse(Boolean.TRUE); final String workingPath = parser.get("workingPath"); -// final String outputPath = parser.get("outputPath"); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -62,25 +59,6 @@ public class SparkUpdateOrcidDatasets { spark -> { JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - LongAccumulator oldAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("old_authors_found"); - LongAccumulator updatedAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("updated_authors_found"); - LongAccumulator newAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("new_authors_found"); - LongAccumulator errorCodeAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("error_code_authors_found"); - LongAccumulator errorLoadingAuthorsJsonFoundAcc = spark - .sparkContext() - .longAccumulator("error_loading_authors_json_found"); - LongAccumulator errorParsingAuthorsXMLFoundAcc = spark - .sparkContext() - .longAccumulator("error_parsing_authors_xml_found"); - LongAccumulator oldWorksFoundAcc = spark .sparkContext() .longAccumulator("old_works_found"); @@ -100,125 +78,11 @@ public class SparkUpdateOrcidDatasets { .sparkContext() .longAccumulator("error_parsing_works_xml_found"); -// JavaPairRDD xmlSummariesRDD = sc -// .sequenceFile(workingPath.concat("xml/authors/xml_authors.seq"), Text.class, Text.class); -// xmlSummariesRDD -// .map(seq -> { -// AuthorSummary authorSummary = XMLRecordParser -// .VTDParseAuthorSummary(seq._2().toString().getBytes()); -// authorSummary -// .setBase64CompressData(ArgumentApplicationParser.compressArgument(seq._2().toString())); -// return authorSummary; -// }) -// .filter(authorSummary -> authorSummary != null) -// .map(authorSummary -> JsonWriter.create(authorSummary)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/authors"), GzipCodec.class); -// -// JavaPairRDD xmlWorksRDD = sc -// .sequenceFile(workingPath.concat("xml/works/*"), Text.class, Text.class); -// -// xmlWorksRDD -// .map(seq -> { -// WorkDetail workDetail = XMLRecordParserNoDoi.VTDParseWorkData(seq._2().toString().getBytes()); -// Work work = new Work(); -// work.setWorkDetail(workDetail); -// work.setBase64CompressData(ArgumentApplicationParser.compressArgument(seq._2().toString())); -// return work; -// }) -// .filter(work -> work != null) -// .map(work -> JsonWriter.create(work)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/works"), GzipCodec.class); - -// Function, AuthorSummary> retrieveAuthorSummaryFunction = data -> { -// AuthorSummary authorSummary = new AuthorSummary(); -// String orcidId = data._1().toString(); -// String jsonData = data._2().toString(); -// JsonElement jElement = new JsonParser().parse(jsonData); -// String statusCode = getJsonValue(jElement, "statusCode"); -// String downloadDate = getJsonValue(jElement, "lastModifiedDate"); -// if (statusCode.equals("200")) { -// String compressedData = getJsonValue(jElement, "compressedData"); -// if (StringUtils.isEmpty(compressedData)) { -// errorLoadingAuthorsJsonFoundAcc.add(1); -// } else { -// String xmlAuthor = ArgumentApplicationParser.decompressValue(compressedData); -// try { -// authorSummary = XMLRecordParser -// .VTDParseAuthorSummary(xmlAuthor.getBytes()); -// authorSummary.setStatusCode(statusCode); -// authorSummary.setDownloadDate("2020-11-18 00:00:05.644768"); -// authorSummary.setBase64CompressData(compressedData); -// return authorSummary; -// } catch (Exception e) { -// logger.error("parsing xml " + orcidId + " [" + jsonData + "]", e); -// errorParsingAuthorsXMLFoundAcc.add(1); -// } -// } -// } else { -// authorSummary.setStatusCode(statusCode); -// authorSummary.setDownloadDate("2020-11-18 00:00:05.644768"); -// errorCodeAuthorsFoundAcc.add(1); -// } -// return authorSummary; -// }; -// -// Dataset downloadedAuthorSummaryDS = spark -// .createDataset( -// sc -// .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class) -// .map(retrieveAuthorSummaryFunction) -// .rdd(), -// Encoders.bean(AuthorSummary.class)); -// Dataset currentAuthorSummaryDS = spark -// .createDataset( -// sc -// .textFile(workingPath.concat("orcid_dataset/authors/*")) -// .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) -// .rdd(), -// Encoders.bean(AuthorSummary.class)); -// currentAuthorSummaryDS -// .joinWith( -// downloadedAuthorSummaryDS, -// currentAuthorSummaryDS -// .col("authorData.oid") -// .equalTo(downloadedAuthorSummaryDS.col("authorData.oid")), -// "full_outer") -// .map(value -> { -// Optional opCurrent = Optional.ofNullable(value._1()); -// Optional opDownloaded = Optional.ofNullable(value._2()); -// if (!opCurrent.isPresent()) { -// newAuthorsFoundAcc.add(1); -// return opDownloaded.get(); -// } -// if (!opDownloaded.isPresent()) { -// oldAuthorsFoundAcc.add(1); -// return opCurrent.get(); -// } -// if (opCurrent.isPresent() && opDownloaded.isPresent()) { -// updatedAuthorsFoundAcc.add(1); -// return opDownloaded.get(); -// } -// return null; -// }, -// Encoders.bean(AuthorSummary.class)) -// .filter(Objects::nonNull) -// .toJavaRDD() -// .map(authorSummary -> OBJECT_MAPPER.writeValueAsString(authorSummary)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/new_authors"), GzipCodec.class); -// -// logger.info("oldAuthorsFoundAcc: " + oldAuthorsFoundAcc.value().toString()); -// logger.info("newAuthorsFoundAcc: " + newAuthorsFoundAcc.value().toString()); -// logger.info("updatedAuthorsFoundAcc: " + updatedAuthorsFoundAcc.value().toString()); -// logger.info("errorCodeFoundAcc: " + errorCodeAuthorsFoundAcc.value().toString()); -// logger.info("errorLoadingJsonFoundAcc: " + errorLoadingAuthorsJsonFoundAcc.value().toString()); -// logger.info("errorParsingXMLFoundAcc: " + errorParsingAuthorsXMLFoundAcc.value().toString()); - - Function retrieveWorkFunction = jsonData -> { + final Function retrieveWorkFunction = jsonData -> { Work work = new Work(); JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); work.setStatusCode(statusCode); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); work.setDownloadDate("2020-11-18 00:00:05.644768"); if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); @@ -247,9 +111,7 @@ public class SparkUpdateOrcidDatasets { .createDataset( sc .textFile(workingPath + "downloads/updated_works/*") - .map(s -> { - return s.substring(21, s.length() - 1); - }) + .map(s -> s.substring(21, s.length() - 1)) .map(retrieveWorkFunction) .rdd(), Encoders.bean(Work.class)); @@ -271,7 +133,7 @@ public class SparkUpdateOrcidDatasets { .col("workDetail.oid") .equalTo(downloadedWorksDS.col("workDetail.oid"))), "full_outer") - .map(value -> { + .map((MapFunction, Work>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -291,23 +153,22 @@ public class SparkUpdateOrcidDatasets { Encoders.bean(Work.class)) .filter(Objects::nonNull) .toJavaRDD() - .map(work -> OBJECT_MAPPER.writeValueAsString(work)) + .map(OBJECT_MAPPER::writeValueAsString) .saveAsTextFile(workingPath.concat("orcid_dataset/new_works"), GzipCodec.class); - logger.info("oldWorksFoundAcc: " + oldWorksFoundAcc.value().toString()); - logger.info("newWorksFoundAcc: " + newWorksFoundAcc.value().toString()); - logger.info("updatedWorksFoundAcc: " + updatedWorksFoundAcc.value().toString()); - logger.info("errorCodeWorksFoundAcc: " + errorCodeWorksFoundAcc.value().toString()); - logger.info("errorLoadingJsonWorksFoundAcc: " + errorLoadingWorksJsonFoundAcc.value().toString()); - logger.info("errorParsingXMLWorksFoundAcc: " + errorParsingWorksXMLFoundAcc.value().toString()); + logger.info("oldWorksFoundAcc: {}", oldWorksFoundAcc.value()); + logger.info("newWorksFoundAcc: {}", newWorksFoundAcc.value()); + logger.info("updatedWorksFoundAcc: {}", updatedWorksFoundAcc.value()); + logger.info("errorCodeWorksFoundAcc: {}", errorCodeWorksFoundAcc.value()); + logger.info("errorLoadingJsonWorksFoundAcc: {}", errorLoadingWorksJsonFoundAcc.value()); + logger.info("errorParsingXMLWorksFoundAcc: {}", errorParsingWorksXMLFoundAcc.value()); }); } private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java index 5ebbc01ed..64523941d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java @@ -13,6 +13,7 @@ import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; @@ -29,14 +30,16 @@ import eu.dnetlib.dhp.schema.orcid.Work; import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; +import scala.Tuple2; public class SparkUpdateOrcidWorks { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -83,7 +86,6 @@ public class SparkUpdateOrcidWorks { JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); work.setStatusCode(statusCode); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); work.setDownloadDate(Long.toString(System.currentTimeMillis())); if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); @@ -112,9 +114,7 @@ public class SparkUpdateOrcidWorks { .createDataset( sc .textFile(workingPath + "downloads/updated_works/*") - .map(s -> { - return s.substring(21, s.length() - 1); - }) + .map(s -> s.substring(21, s.length() - 1)) .map(retrieveWorkFunction) .rdd(), Encoders.bean(Work.class)); @@ -136,7 +136,7 @@ public class SparkUpdateOrcidWorks { .col("workDetail.oid") .equalTo(downloadedWorksDS.col("workDetail.oid"))), "full_outer") - .map(value -> { + .map((MapFunction, Work>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -159,12 +159,12 @@ public class SparkUpdateOrcidWorks { .map(work -> OBJECT_MAPPER.writeValueAsString(work)) .saveAsTextFile(workingPath.concat("orcid_dataset/new_works"), GzipCodec.class); - logger.info("oldWorksFoundAcc: " + oldWorksFoundAcc.value().toString()); - logger.info("newWorksFoundAcc: " + newWorksFoundAcc.value().toString()); - logger.info("updatedWorksFoundAcc: " + updatedWorksFoundAcc.value().toString()); - logger.info("errorCodeWorksFoundAcc: " + errorCodeWorksFoundAcc.value().toString()); - logger.info("errorLoadingJsonWorksFoundAcc: " + errorLoadingWorksJsonFoundAcc.value().toString()); - logger.info("errorParsingXMLWorksFoundAcc: " + errorParsingWorksXMLFoundAcc.value().toString()); + logger.info("oldWorksFoundAcc: {}", oldWorksFoundAcc.value()); + logger.info("newWorksFoundAcc: {}", newWorksFoundAcc.value()); + logger.info("updatedWorksFoundAcc: {}", updatedWorksFoundAcc.value()); + logger.info("errorCodeWorksFoundAcc: {}", errorCodeWorksFoundAcc.value()); + logger.info("errorLoadingJsonWorksFoundAcc: {}", errorLoadingWorksJsonFoundAcc.value()); + logger.info("errorParsingXMLWorksFoundAcc: {}", errorParsingWorksXMLFoundAcc.value()); String lastModifiedDateFromLambdaFile = HDFSUtil .readFromTextFile(hdfsServerUri, workingPath, "last_modified_date_from_lambda_file.txt"); @@ -175,8 +175,7 @@ public class SparkUpdateOrcidWorks { private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java index c85b5b691..af6def227 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java @@ -20,6 +20,9 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.io.compress.GzipCodec; import org.mortbay.log.Log; +import com.ximpleware.ParseException; + +import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; @@ -28,22 +31,23 @@ public class SummariesDecompressor { private static final int MAX_XML_RECORDS_PARSED = -1; - public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath) - throws Exception { - String uri = inputUri; - FileSystem fs = FileSystem.get(URI.create(uri), conf); - Path inputPath = new Path(uri); + private SummariesDecompressor() { + } + + public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath) throws IOException { + FileSystem fs = FileSystem.get(URI.create(inputUri), conf); + Path inputPath = new Path(inputUri); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(inputPath); if (codec == null) { - System.err.println("No codec found for " + uri); + System.err.println("No codec found for " + inputUri); System.exit(1); } - CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension()); + CompressionCodecFactory.removeSuffix(inputUri, codec.getDefaultExtension()); InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarSummaries(fs, conf, gzipInputStream, outputPath); + parseTarSummaries(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -52,7 +56,7 @@ public class SummariesDecompressor { } private static void parseTarSummaries( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int nameFound = 0; int surnameFound = 0; @@ -163,7 +167,7 @@ public class SummariesDecompressor { } public static void extractXML(Configuration conf, String inputUri, Path outputPath) - throws Exception { + throws IOException, VtdException, ParseException { String uri = inputUri; FileSystem fs = FileSystem.get(URI.create(uri), conf); Path inputPath = new Path(uri); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java index a2342f7b4..9eb73b240 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java @@ -7,6 +7,9 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; public class JsonHelper { + private JsonHelper() { + } + public static String createOidWork(WorkDetail workData) { return new Gson().toJson(workData); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java index e1a913476..bbd2e1f7e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java @@ -2,10 +2,8 @@ package eu.dnetlib.doiboost.orcid.util; import java.io.*; -import java.net.URI; import java.nio.charset.StandardCharsets; -import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -14,42 +12,39 @@ import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; - -import eu.dnetlib.doiboost.orcid.SparkDownloadOrcidAuthors; - public class HDFSUtil { static Logger logger = LoggerFactory.getLogger(HDFSUtil.class); + private HDFSUtil() { + } + private static FileSystem getFileSystem(String hdfsServerUri) throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsServerUri); - FileSystem fileSystem = FileSystem.get(conf); - return fileSystem; + return FileSystem.get(conf); } public static String readFromTextFile(String hdfsServerUri, String workingPath, String path) throws IOException { FileSystem fileSystem = getFileSystem(hdfsServerUri); Path toReadPath = new Path(workingPath.concat(path)); if (!fileSystem.exists(toReadPath)) { - throw new RuntimeException("File not exist: " + path); + throw new IOException("File not exist: " + path); } - logger.info("Last_update_path " + toReadPath); + logger.info("Last_update_path {}", toReadPath); FSDataInputStream inputStream = new FSDataInputStream(fileSystem.open(toReadPath)); - BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); - StringBuffer sb = new StringBuffer(); - try { + try (BufferedReader br = new BufferedReader(new InputStreamReader(inputStream))) { + StringBuilder sb = new StringBuilder(); + String line; while ((line = br.readLine()) != null) { sb.append(line); } - } finally { - br.close(); + + String buffer = sb.toString(); + logger.info("Last_update: {}", buffer); + return buffer; } - String buffer = sb.toString(); - logger.info("Last_update: " + buffer); - return buffer; } public static void writeToTextFile(String hdfsServerUri, String workingPath, String path, String text) @@ -60,8 +55,8 @@ public class HDFSUtil { fileSystem.delete(toWritePath, true); } FSDataOutputStream os = fileSystem.create(toWritePath); - BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)); - br.write(text); - br.close(); + try (BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8))) { + br.write(text); + } } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java index 52e076105..e8745aa96 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java @@ -1,7 +1,6 @@ package eu.dnetlib.doiboost.orcid.xml; -import java.io.IOException; import java.util.*; import org.apache.commons.lang3.StringUtils; @@ -38,6 +37,9 @@ public class XMLRecordParser { private static final String NS_ERROR = "error"; + private XMLRecordParser() { + } + public static AuthorData VTDParseAuthorData(byte[] bytes) throws VtdException, ParseException { final VTDGen vg = new VTDGen(); @@ -90,46 +92,6 @@ public class XMLRecordParser { authorData.setOtherNames(otherNames); } -// final String creationMethod = VtdUtilityParser.getSingleValue(ap, vn, "//history:creation-method"); -// if (StringUtils.isNoneBlank(creationMethod)) { -// authorData.setCreationMethod(creationMethod); -// } -// -// final String completionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:completion-date"); -// if (StringUtils.isNoneBlank(completionDate)) { -// authorData.setCompletionDate(completionDate); -// } -// -// final String submissionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:submission-date"); -// if (StringUtils.isNoneBlank(submissionDate)) { -// authorData.setSubmissionDate(submissionDate); -// } -// -// final String claimed = VtdUtilityParser.getSingleValue(ap, vn, "//history:claimed"); -// if (StringUtils.isNoneBlank(claimed)) { -// authorData.setClaimed(Boolean.parseBoolean(claimed)); -// } -// -// final String verifiedEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-email"); -// if (StringUtils.isNoneBlank(verifiedEmail)) { -// authorData.setVerifiedEmail(Boolean.parseBoolean(verifiedEmail)); -// } -// -// final String verifiedPrimaryEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-primary-email"); -// if (StringUtils.isNoneBlank(verifiedPrimaryEmail)) { -// authorData.setVerifiedPrimaryEmail(Boolean.parseBoolean(verifiedPrimaryEmail)); -// } -// -// final String deactivationDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:deactivation-date"); -// if (StringUtils.isNoneBlank(deactivationDate)) { -// authorData.setDeactivationDate(deactivationDate); -// } -// -// final String lastModifiedDate = VtdUtilityParser -// .getSingleValue(ap, vn, "//history:history/common:last-modified-date"); -// if (StringUtils.isNoneBlank(lastModifiedDate)) { -// authorData.setLastModifiedDate(lastModifiedDate); -// } return authorData; } @@ -207,7 +169,7 @@ public class XMLRecordParser { } public static Map retrieveWorkIdLastModifiedDate(byte[] bytes) - throws ParseException, XPathParseException, NavException, XPathEvalException, IOException { + throws ParseException, XPathParseException, NavException, XPathEvalException { final VTDGen vg = new VTDGen(); vg.setDoc(bytes); vg.parse(true); @@ -251,15 +213,15 @@ public class XMLRecordParser { ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL); ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL); - AuthorData authorData = retrieveAuthorData(ap, vn, bytes); - AuthorHistory authorHistory = retrieveAuthorHistory(ap, vn, bytes); + AuthorData authorData = retrieveAuthorData(ap, vn); + AuthorHistory authorHistory = retrieveAuthorHistory(ap, vn); AuthorSummary authorSummary = new AuthorSummary(); authorSummary.setAuthorData(authorData); authorSummary.setAuthorHistory(authorHistory); return authorSummary; } - private static AuthorData retrieveAuthorData(AutoPilot ap, VTDNav vn, byte[] bytes) + private static AuthorData retrieveAuthorData(AutoPilot ap, VTDNav vn) throws VtdException { AuthorData authorData = new AuthorData(); final List errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code"); @@ -300,7 +262,7 @@ public class XMLRecordParser { return authorData; } - private static AuthorHistory retrieveAuthorHistory(AutoPilot ap, VTDNav vn, byte[] bytes) + private static AuthorHistory retrieveAuthorHistory(AutoPilot ap, VTDNav vn) throws VtdException { AuthorHistory authorHistory = new AuthorHistory(); final String creationMethod = VtdUtilityParser.getSingleValue(ap, vn, "//history:creation-method"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java index 124a1b9ef..ddbf71bbb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -33,6 +33,9 @@ public class ActivitiesDumpReader { private static final int MAX_XML_WORKS_PARSED = -1; private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; + private ActivitiesDumpReader() { + } + public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) throws Exception { String uri = inputUri; @@ -48,7 +51,7 @@ public class ActivitiesDumpReader { InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarActivities(fs, conf, gzipInputStream, outputPath); + parseTarActivities(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -56,8 +59,7 @@ public class ActivitiesDumpReader { } } - private static void parseTarActivities( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + private static void parseTarActivities(Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int noDoiFound = 0; int errorFromOrcidFound = 0; @@ -73,7 +75,7 @@ public class ActivitiesDumpReader { SequenceFile.Writer.valueClass(Text.class))) { while ((entry = tais.getNextTarEntry()) != null) { String filename = entry.getName(); - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); try { if (entry.isDirectory() || !filename.contains("works")) { @@ -83,12 +85,12 @@ public class ActivitiesDumpReader { BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from // tarInput String line; - buffer = new StringBuffer(); + builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } WorkDetail workDetail = XMLRecordParserNoDoi - .VTDParseWorkData(buffer.toString().getBytes()); + .VTDParseWorkData(builder.toString().getBytes()); if (workDetail != null) { if (workDetail.getErrorCode() != null) { errorFromOrcidFound += 1; @@ -123,7 +125,7 @@ public class ActivitiesDumpReader { } } else { - Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer); + Log.warn("Data not retrievable [" + entry.getName() + "] " + builder); xmlParserErrorFound += 1; } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java index 4a64124d1..5c23a33a8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java @@ -3,9 +3,9 @@ package eu.dnetlib.doiboost.orcidnodoi; import java.io.IOException; +import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; @@ -16,7 +16,6 @@ import eu.dnetlib.doiboost.orcid.OrcidDSManager; * This job generates one sequence file, the key is an orcid identifier and the * value is an orcid publication in json format */ - public class GenOrcidAuthorWork extends OrcidDSManager { private String activitiesFileNameTarGz; @@ -30,13 +29,12 @@ public class GenOrcidAuthorWork extends OrcidDSManager { public void generateAuthorsDOIsData() throws Exception { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputWorksPath)); ActivitiesDumpReader.parseGzActivities(conf, tarGzUri, outputPath); } - private void loadArgs(String[] args) throws Exception { + private void loadArgs(String[] args) throws ParseException, IOException { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 1d47808ef..db3b14923 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -3,7 +3,7 @@ package eu.dnetlib.doiboost.orcidnodoi; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; +import java.io.FileNotFoundException; import java.util.Arrays; import java.util.List; import java.util.Objects; @@ -14,21 +14,16 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; -import org.mortbay.log.Log; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.gson.Gson; -import com.google.gson.JsonElement; -import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; @@ -75,7 +70,7 @@ public class SparkGenEnrichedOrcidWorks { spark -> { String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); if (StringUtils.isBlank(lastUpdate)) { - throw new RuntimeException("last update info not found"); + throw new FileNotFoundException("last update info not found"); } final String dateOfCollection = lastUpdate.substring(0, 10); JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -86,10 +81,10 @@ public class SparkGenEnrichedOrcidWorks { .textFile(workingPath.concat(orcidDataFolder).concat("/authors/*")) .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) .filter(authorSummary -> authorSummary.getAuthorData() != null) - .map(authorSummary -> authorSummary.getAuthorData()) + .map(AuthorSummary::getAuthorData) .rdd(), Encoders.bean(AuthorData.class)); - logger.info("Authors data loaded: " + authorDataset.count()); + logger.info("Authors data loaded: {}", authorDataset.count()); Dataset workDataset = spark .createDataset( @@ -97,7 +92,7 @@ public class SparkGenEnrichedOrcidWorks { .textFile(workingPath.concat(orcidDataFolder).concat("/works/*")) .map(item -> OBJECT_MAPPER.readValue(item, Work.class)) .filter(work -> work.getWorkDetail() != null) - .map(work -> work.getWorkDetail()) + .map(Work::getWorkDetail) .filter(work -> work.getErrorCode() == null) .filter( work -> work @@ -107,7 +102,7 @@ public class SparkGenEnrichedOrcidWorks { .noneMatch(e -> e.getType().equalsIgnoreCase("doi"))) .rdd(), Encoders.bean(WorkDetail.class)); - logger.info("Works data loaded: " + workDataset.count()); + logger.info("Works data loaded: {}", workDataset.count()); final LongAccumulator warnNotFoundContributors = spark .sparkContext() @@ -122,7 +117,7 @@ public class SparkGenEnrichedOrcidWorks { WorkDetail w = value._1; AuthorData a = value._2; if (w.getContributors() == null - || (w.getContributors() != null && w.getContributors().size() == 0)) { + || (w.getContributors() != null && w.getContributors().isEmpty())) { Contributor c = new Contributor(); c.setName(a.getName()); c.setSurname(a.getSurname()); @@ -163,7 +158,6 @@ public class SparkGenEnrichedOrcidWorks { final PublicationToOaf publicationToOaf = new PublicationToOaf( parsedPublications, enrichedPublications, - errorsGeneric, errorsInvalidTitle, errorsNotFoundAuthors, errorsInvalidType, @@ -172,13 +166,10 @@ public class SparkGenEnrichedOrcidWorks { titleNotProvidedAcc, noUrlAcc, dateOfCollection); + JavaRDD oafPublicationRDD = enrichedWorksRDD - .map( - e -> { - return (Publication) publicationToOaf - .generatePublicationActionsFromJson(e._2()); - }) - .filter(p -> p != null); + .map(e -> (Publication) publicationToOaf.generatePublicationActionsFromJson(e._2())) + .filter(Objects::nonNull); sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true"); @@ -186,7 +177,7 @@ public class SparkGenEnrichedOrcidWorks { .mapToPair( p -> new Tuple2<>(p.getClass().toString(), OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, p)))) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) .saveAsNewAPIHadoopFile( outputEnrichedWorksPath, Text.class, @@ -194,17 +185,17 @@ public class SparkGenEnrichedOrcidWorks { SequenceFileOutputFormat.class, sc.hadoopConfiguration()); - logger.info("parsedPublications: " + parsedPublications.value().toString()); - logger.info("enrichedPublications: " + enrichedPublications.value().toString()); - logger.info("warnNotFoundContributors: " + warnNotFoundContributors.value().toString()); - logger.info("errorsGeneric: " + errorsGeneric.value().toString()); - logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString()); - logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString()); - logger.info("errorsInvalidType: " + errorsInvalidType.value().toString()); - logger.info("otherTypeFound: " + otherTypeFound.value().toString()); - logger.info("deactivatedAcc: " + deactivatedAcc.value().toString()); - logger.info("titleNotProvidedAcc: " + titleNotProvidedAcc.value().toString()); - logger.info("noUrlAcc: " + noUrlAcc.value().toString()); + logger.info("parsedPublications: {}", parsedPublications.value()); + logger.info("enrichedPublications: {}", enrichedPublications.value()); + logger.info("warnNotFoundContributors: {}", warnNotFoundContributors.value()); + logger.info("errorsGeneric: {}", errorsGeneric.value()); + logger.info("errorsInvalidTitle: {}", errorsInvalidTitle.value()); + logger.info("errorsNotFoundAuthors: {}", errorsNotFoundAuthors.value()); + logger.info("errorsInvalidType: {}", errorsInvalidType.value()); + logger.info("otherTypeFound: {}", otherTypeFound.value()); + logger.info("deactivatedAcc: {}", deactivatedAcc.value()); + logger.info("titleNotProvidedAcc: {}", titleNotProvidedAcc.value()); + logger.info("noUrlAcc: {}", noUrlAcc.value()); }); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java index 23e9dd884..33f3b3bbb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java @@ -18,6 +18,9 @@ public class JsonWriter { public static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); + private JsonWriter() { + } + public static String create(AuthorData authorData) throws JsonProcessingException { return OBJECT_MAPPER.writeValueAsString(authorData); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 215753899..f92040c24 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -18,6 +18,7 @@ import com.google.gson.*; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; @@ -26,21 +27,19 @@ import eu.dnetlib.doiboost.orcidnodoi.util.Pair; /** * This class converts an orcid publication from json format to oaf */ - public class PublicationToOaf implements Serializable { static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class); - public final static String orcidPREFIX = "orcid_______"; + public static final String orcidPREFIX = "orcid_______"; public static final String OPENAIRE_PREFIX = "openaire____"; - public static final String SEPARATOR = "::"; + public static final String SEPARATOR = IdentifierFactory.ID_SEPARATOR; public static final String DEACTIVATED_NAME = "Given Names Deactivated"; public static final String DEACTIVATED_SURNAME = "Family Name Deactivated"; private String dateOfCollection = ""; private final LongAccumulator parsedPublications; private final LongAccumulator enrichedPublications; - private final LongAccumulator errorsGeneric; private final LongAccumulator errorsInvalidTitle; private final LongAccumulator errorsNotFoundAuthors; private final LongAccumulator errorsInvalidType; @@ -52,7 +51,6 @@ public class PublicationToOaf implements Serializable { public PublicationToOaf( LongAccumulator parsedPublications, LongAccumulator enrichedPublications, - LongAccumulator errorsGeneric, LongAccumulator errorsInvalidTitle, LongAccumulator errorsNotFoundAuthors, LongAccumulator errorsInvalidType, @@ -63,7 +61,6 @@ public class PublicationToOaf implements Serializable { String dateOfCollection) { this.parsedPublications = parsedPublications; this.enrichedPublications = enrichedPublications; - this.errorsGeneric = errorsGeneric; this.errorsInvalidTitle = errorsInvalidTitle; this.errorsNotFoundAuthors = errorsNotFoundAuthors; this.errorsInvalidType = errorsInvalidType; @@ -77,7 +74,6 @@ public class PublicationToOaf implements Serializable { public PublicationToOaf() { this.parsedPublications = null; this.enrichedPublications = null; - this.errorsGeneric = null; this.errorsInvalidTitle = null; this.errorsNotFoundAuthors = null; this.errorsInvalidType = null; @@ -88,19 +84,8 @@ public class PublicationToOaf implements Serializable { this.dateOfCollection = null; } - private static final Map> datasources = new HashMap>() { - - { - put( - ModelConstants.ORCID, - new Pair<>(ModelConstants.ORCID.toUpperCase(), OPENAIRE_PREFIX + SEPARATOR + ModelConstants.ORCID)); - - } - }; - // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname private static final Map> externalIds = new HashMap>() { - { put("ark".toLowerCase(), new Pair<>("ark", "ark")); put("arxiv".toLowerCase(), new Pair<>("arXiv", "arXiv")); @@ -208,10 +193,8 @@ public class PublicationToOaf implements Serializable { .setTitle( titles .stream() - .map(t -> { - return mapStructuredProperty(t, ModelConstants.MAIN_TITLE_QUALIFIER, null); - }) - .filter(s -> s != null) + .map(t -> mapStructuredProperty(t, ModelConstants.MAIN_TITLE_QUALIFIER, null)) + .filter(Objects::nonNull) .collect(Collectors.toList())); // Adding identifier final String id = getStringValue(rootElement, "id"); @@ -226,7 +209,7 @@ public class PublicationToOaf implements Serializable { publication.setId(sourceId); // Adding relevant date - settingRelevantDate(rootElement, publication, "publication_date", "issued", true); + settingRelevantDate(rootElement, publication, "issued", true); // Adding collectedfrom publication.setCollectedfrom(Arrays.asList(createCollectedFrom())); @@ -243,7 +226,7 @@ public class PublicationToOaf implements Serializable { Map publicationType = typologiesMapping.get(type); if ((publicationType == null || publicationType.isEmpty()) && errorsInvalidType != null) { errorsInvalidType.add(1); - logger.error("publication_type_not_found: " + type); + logger.error("publication_type_not_found: {}", type); return null; } @@ -307,7 +290,7 @@ public class PublicationToOaf implements Serializable { // Adding authors final List authors = createAuthors(rootElement); - if (authors != null && authors.size() > 0) { + if (authors != null && !authors.isEmpty()) { if (authors.stream().filter(a -> { return ((Objects.nonNull(a.getName()) && a.getName().equals(DEACTIVATED_NAME)) || (Objects.nonNull(a.getSurname()) && a.getSurname().equals(DEACTIVATED_SURNAME))); @@ -322,8 +305,7 @@ public class PublicationToOaf implements Serializable { } else { if (authors == null) { Gson gson = new GsonBuilder().setPrettyPrinting().create(); - String json = gson.toJson(rootElement); - throw new RuntimeException("not_valid_authors: " + json); + throw new RuntimeException("not_valid_authors: " + gson.toJson(rootElement)); } else { if (errorsNotFoundAuthors != null) { errorsNotFoundAuthors.add(1); @@ -434,7 +416,6 @@ public class PublicationToOaf implements Serializable { private void settingRelevantDate(final JsonObject rootElement, final Publication publication, - final String jsonKey, final String dictionaryKey, final boolean addToDateOfAcceptance) { @@ -450,10 +431,8 @@ public class PublicationToOaf implements Serializable { Arrays .asList(pubDate) .stream() - .map(r -> { - return mapStructuredProperty(r, q, null); - }) - .filter(s -> s != null) + .map(r -> mapStructuredProperty(r, q, null)) + .filter(Objects::nonNull) .collect(Collectors.toList())); } } @@ -498,7 +477,7 @@ public class PublicationToOaf implements Serializable { final String type = getStringValue(rootElement, "type"); if (!typologiesMapping.containsKey(type)) { - logger.error("unknowntype_" + type); + logger.error("unknowntype_{}", type); if (errorsInvalidType != null) { errorsInvalidType.add(1); } @@ -550,7 +529,7 @@ public class PublicationToOaf implements Serializable { } private StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) { - if (value == null | StringUtils.isBlank(value)) { + if (value == null || StringUtils.isBlank(value)) { return null; } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java index fff753ff3..e69b496b7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java @@ -1,26 +1,14 @@ package eu.dnetlib.doiboost.orcidnodoi.similarity; -import java.io.IOException; import java.text.Normalizer; import java.util.*; import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.similarity.JaroWinklerSimilarity; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.ximpleware.NavException; -import com.ximpleware.ParseException; -import com.ximpleware.XPathEvalException; -import com.ximpleware.XPathParseException; - -import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.dhp.schema.orcid.Contributor; -import eu.dnetlib.dhp.schema.orcid.WorkDetail; /** * This class is used for searching from a list of publication contributors a @@ -29,18 +17,16 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; * the match is found (if exist) author informations are used to enrich the * matched contribuotr inside contributors list */ - public class AuthorMatcher { - private static final Logger logger = LoggerFactory.getLogger(AuthorMatcher.class); - public static final Double threshold = 0.8; + public static final Double THRESHOLD = 0.8; - public static void match(AuthorData author, List contributors) - throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { + private AuthorMatcher() { + } + public static void match(AuthorData author, List contributors) { int matchCounter = 0; List matchCounters = Arrays.asList(matchCounter); - Contributor contributor = null; contributors .stream() .filter(c -> !StringUtils.isBlank(c.getCreditName())) @@ -62,8 +48,8 @@ public class AuthorMatcher { c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= threshold) - .max(Comparator.comparing(c -> c.getScore())); + .filter(c -> c.getScore() >= THRESHOLD) + .max(Comparator.comparing(Contributor::getScore)); Contributor bestMatchContributor = null; if (optCon.isPresent()) { bestMatchContributor = optCon.get(); @@ -73,14 +59,14 @@ public class AuthorMatcher { } else if (matchCounters.get(0) > 1) { Optional optCon = contributors .stream() - .filter(c -> c.isSimpleMatch()) + .filter(Contributor::isSimpleMatch) .filter(c -> !StringUtils.isBlank(c.getCreditName())) .map(c -> { c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= threshold) - .max(Comparator.comparing(c -> c.getScore())); + .filter(c -> c.getScore() >= THRESHOLD) + .max(Comparator.comparing(Contributor::getScore)); Contributor bestMatchContributor = null; if (optCon.isPresent()) { bestMatchContributor = optCon.get(); @@ -92,7 +78,7 @@ public class AuthorMatcher { } public static boolean simpleMatchOnOtherNames(String name, List otherNames) { - if (otherNames == null || (otherNames != null && otherNames.isEmpty())) { + if (otherNames == null || otherNames.isEmpty()) { return false; } return otherNames.stream().filter(o -> simpleMatch(name, o)).count() > 0; @@ -132,8 +118,7 @@ public class AuthorMatcher { } public static Double similarity(String nameA, String surnameA, String nameB, String surnameB) { - Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB); - return score; + return similarityJaroWinkler(nameA, surnameA, nameB, surnameB); } private static Double similarityJaroWinkler(String nameA, String surnameA, String nameB, String surnameB) { @@ -179,7 +164,7 @@ public class AuthorMatcher { public static void updateAuthorsSimilarityMatch(List contributors, AuthorData author) { contributors .stream() - .filter(c -> c.isBestMatch()) + .filter(Contributor::isBestMatch) .forEach(c -> { c.setName(author.getName()); c.setSurname(author.getSurname()); @@ -206,9 +191,4 @@ public class AuthorMatcher { } } - private static String toJson(WorkDetail work) { - GsonBuilder builder = new GsonBuilder(); - Gson gson = builder.create(); - return gson.toJson(work); - } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java index 29791bbbd..b4c12eed3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java @@ -5,9 +5,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.ximpleware.*; import eu.dnetlib.dhp.parser.utility.VtdException; @@ -20,21 +17,10 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; /** * This class is used for parsing xml data with vtd parser */ - public class XMLRecordParserNoDoi { - private static final Logger logger = LoggerFactory.getLogger(XMLRecordParserNoDoi.class); - private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; private static final String NS_COMMON = "common"; - private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person"; - private static final String NS_PERSON = "person"; - private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details"; - private static final String NS_DETAILS = "personal-details"; - private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name"; - private static final String NS_OTHER = "other-name"; - private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record"; - private static final String NS_RECORD = "record"; private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error"; private static final String NS_WORK = "work"; @@ -42,6 +28,9 @@ public class XMLRecordParserNoDoi { private static final String NS_ERROR = "error"; + private XMLRecordParserNoDoi() { + } + public static WorkDetail VTDParseWorkData(byte[] bytes) throws VtdException, ParseException, XPathParseException, NavException, XPathEvalException { @@ -100,16 +89,16 @@ public class XMLRecordParserNoDoi { workData.setUrls(urls); } - workData.setPublicationDates(getPublicationDates(vg, vn, ap)); - workData.setExtIds(getExternalIds(vg, vn, ap)); - workData.setContributors(getContributors(vg, vn, ap)); + workData.setPublicationDates(getPublicationDates(vn, ap)); + workData.setExtIds(getExternalIds(vn, ap)); + workData.setContributors(getContributors(vn, ap)); return workData; } - private static List getPublicationDates(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getPublicationDates(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List publicationDates = new ArrayList(); + List publicationDates = new ArrayList<>(); int yearIndex = 0; ap.selectXPath("//common:publication-date/common:year"); while (ap.evalXPath() != -1) { @@ -142,9 +131,9 @@ public class XMLRecordParserNoDoi { return publicationDates; } - private static List getExternalIds(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getExternalIds(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List extIds = new ArrayList(); + List extIds = new ArrayList<>(); int typeIndex = 0; ap.selectXPath("//common:external-id/common:external-id-type"); while (ap.evalXPath() != -1) { @@ -177,12 +166,12 @@ public class XMLRecordParserNoDoi { if (typeIndex == valueIndex) { return extIds; } - return new ArrayList(); + return new ArrayList<>(); } - private static List getContributors(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getContributors(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List contributors = new ArrayList(); + List contributors = new ArrayList<>(); ap.selectXPath("//work:contributors/work:contributor"); while (ap.evalXPath() != -1) { Contributor contributor = new Contributor(); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 2b241ed5f..82577e0d8 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.orcid; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.*; @@ -57,7 +58,7 @@ public class OrcidClientTest { // 'https://api.orcid.org/v3.0/0000-0001-7291-3210/record' @Test - public void downloadTest() throws Exception { + void downloadTest() throws Exception { final String orcid = "0000-0001-7291-3210"; String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD); String filename = testPath + "/downloaded_record_".concat(orcid).concat(".xml"); @@ -159,18 +160,18 @@ public class OrcidClientTest { } @Test - private void testReadBase64CompressedRecord() throws Exception { + void testReadBase64CompressedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord); logToFile(testPath, "\n\ndownloaded \n\n" + recordFromSeqFile); final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161", REQUEST_TYPE_RECORD); - assertTrue(recordFromSeqFile.equals(downloadedRecord)); + assertEquals(recordFromSeqFile, downloadedRecord); } @Test @Disabled - public void lambdaFileReaderTest() throws Exception { + void lambdaFileReaderTest() throws Exception { String last_update = "2021-01-12 00:00:06.685137"; TarArchiveInputStream input = new TarArchiveInputStream( new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar"))); @@ -187,7 +188,7 @@ public class OrcidClientTest { while ((line = br.readLine()) != null) { String[] values = line.split(","); List recordInfo = Arrays.asList(values); - assertTrue(recordInfo.size() == 4); + assertEquals(4, recordInfo.size()); String orcid = recordInfo.get(0); String modifiedDate = recordInfo.get(3); rowNum++; @@ -264,7 +265,7 @@ public class OrcidClientTest { } @Test - public void downloadWorkTest() throws Exception { + void downloadWorkTest() throws Exception { String orcid = "0000-0003-0015-1952"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK); String filename = "/tmp/downloaded_work_".concat(orcid).concat(".xml"); @@ -274,7 +275,7 @@ public class OrcidClientTest { } @Test - public void downloadRecordTest() throws Exception { + void downloadRecordTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD); String filename = "/tmp/downloaded_record_".concat(orcid).concat(".xml"); @@ -284,7 +285,7 @@ public class OrcidClientTest { } @Test - public void downloadWorksTest() throws Exception { + void downloadWorksTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORKS); String filename = "/tmp/downloaded_works_".concat(orcid).concat(".xml"); @@ -294,7 +295,7 @@ public class OrcidClientTest { } @Test - public void downloadSingleWorkTest() throws Exception { + void downloadSingleWorkTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK); String filename = "/tmp/downloaded_work_47652866_".concat(orcid).concat(".xml"); @@ -304,7 +305,7 @@ public class OrcidClientTest { } @Test - public void cleanAuthorListTest() throws Exception { + void cleanAuthorListTest() throws Exception { AuthorData a1 = new AuthorData(); a1.setOid("1"); a1.setName("n1"); @@ -333,7 +334,7 @@ public class OrcidClientTest { @Test @Ignore - public void testUpdatedRecord() throws Exception { + void testUpdatedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String record = ArgumentApplicationParser.decompressValue(base64CompressedRecord); @@ -342,7 +343,7 @@ public class OrcidClientTest { @Test @Ignore - private void testUpdatedWork() throws Exception { + void testUpdatedWork() throws Exception { final String base64CompressedWork = "H4sIAAAAAAAAAM1XS2/jNhC+51cQOuxJsiXZSR03Vmq0G6Bo013E6R56oyXaZiOJWpKy4y783zvUg5Ksh5uiCJogisX5Zjj85sHx3f1rFKI94YKyeGE4I9tAJPZZQOPtwvj9+cGaGUhIHAc4ZDFZGEcijHvv6u7A+MtcPVCSSgsUQObYzuzaccBEguVuYYxt+LHgbwKP6a11M3WnY6UzrpB7KuiahlQeF0aSrkPqGwhcisWcxpLwGIcLYydlMh+PD4fDiHGfBvDcjmMxLhGlBglSH8vsIH0qGlLqBFRIGvvDWjWQ1iMJJ2CKBANqGlNqMbkj3IpxRPq1KkypFZFoDRHa0aRfq8JoNjhnfIAJJS6xPouiIQJyeYmGQzE+cO5cXqITcItBlKyASExD0a93jiwtvJDjYXDDAqBPHoH2wMmVWGNf8xyyaEBiSTeUDHHWBpd2Nmmc10yfbgHQrHCyIRxKjQwRUoFKPRwEnIgBnQJQVdGeQgJaCRN0OMnPkaUFVbD9WkpaIndQJowf+8EFoIpTErJjBFQOBavElFpfUxwC9ZcqvQErdQXhe+oPFF8BaObupYzVsYEOARzSoZBWmKqaBMHcV0Wf8oG0beIqD+Gdkz0lhyE3NajUW6fhQFSV9Nw/MCBYyofYa0EN7wrBz13eP+Y+J6obWgE8Pdd2JpYD94P77Ezmjj13b0bu5PqPu3EXumEnxEJaEVxSUIHammsra+53z44zt2/m1/bItaeVtQ6dhs3c4XytvW75IYUchMKvEHVUyqmnWBFAS0VJrqSvQde6vp251ux2NtFuKcVOi+oK9YY0M0Cn6o4J6WkvtEK2XJ1vfPGAZxSoK8lb+SxJBbLQx1CohOLndjJUywQWUFmqEi3G6Zaqf/7buOyYJd5IYpfmf0XipfP18pDR9cQCeEuJQI/Lx36bFbVnpBeL2UwmqQw7ApAvf4GeGGQdEbENgolui/wdpjHaYCmPCIPPAmGBIsxfoLUhyRCB0SeCakEBJRKBtfJ+UBbI15TG4PaGBAhWthx8DmFYtHZQujv1CWbLLdzmmUKmHEOWCe1/zdu78bn/+YH+hCOqOzcXfFwuP6OVT/P710crwqGXFrpNaM2GT3MXarw01i15TIi3pmtJXgtbTVGf3h6HKfF+wBAnPyTfdCChudlm5gZaoG//F9pPZsGQcqqbyZN5hBau5OoIJ3PPwjTKDuG4s5MZp2rMzF5PZoK34IT6PIFOPrk+mTiVO5aJH2C+JJRjE/06eoRfpJxa4VgyYaLlaJUv/EhCfATMU/76gEOfmehL/qbJNNHjaFna+CQYB8wvo9PpPFJ5MOrJ1Ix7USBZqBl7KRNOx1d3jex7SG6zuijqCMWRusBsncjZSrM2u82UJmqzpGhvUJN2t6caIM9QQgO9c0t40UROnWsJd2Rbs+nsxpna9u30ttNkjechmzHjEST+X5CkkuNY0GzQkzyFseAf7lSZuLwdh1xSXKvvQJ4g4abTYgPV7uMt3rskohlJmMa82kQkshtyBEIYqQ+YB8X3oRHg7iFKi/bZP+Ao+T6BJhIT/vNPi8ffZs+flk+r2v0WNroZiyWn6xRmadHqTJXsjLJczElAZX6TnJdoWTM1SI2gfutv3rjeBt5t06rVvNuWup29246tlvluO+u2/G92bK9DXheL6uFd/Q3EaRDZqBIAAA=="; final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); logToFile(testPath, "\n\nwork updated \n\n" + work); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java index 235db52d4..78760fa96 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java @@ -24,10 +24,7 @@ import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; public class XMLRecordParserTest { - private static final String NS_WORK = "work"; - private static final String NS_WORK_URL = "http://www.orcid.org/ns/work"; - private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; - private static final String NS_COMMON = "common"; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static Path testPath; @@ -38,12 +35,10 @@ public class XMLRecordParserTest { } @Test - public void testOrcidAuthorDataXMLParser() throws Exception { + void testOrcidAuthorDataXMLParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_0000-0001-6828-479X.xml")); - XMLRecordParser p = new XMLRecordParser(); - AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getName()); @@ -54,12 +49,10 @@ public class XMLRecordParserTest { } @Test - public void testOrcidXMLErrorRecordParser() throws Exception { + void testOrcidXMLErrorRecordParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_error.xml")); - XMLRecordParser p = new XMLRecordParser(); - AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getErrorCode()); @@ -67,14 +60,12 @@ public class XMLRecordParserTest { } @Test - public void testOrcidWorkDataXMLParser() throws Exception { + void testOrcidWorkDataXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); - XMLRecordParser p = new XMLRecordParser(); - WorkData workData = XMLRecordParser.VTDParseWorkData(xml.getBytes()); assertNotNull(workData); assertNotNull(workData.getOid()); @@ -83,7 +74,7 @@ public class XMLRecordParserTest { } @Test - public void testOrcidOtherNamesXMLParser() throws Exception { + void testOrcidOtherNamesXMLParser() throws Exception { String xml = IOUtils .toString( @@ -91,30 +82,13 @@ public class XMLRecordParserTest { AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getOtherNames()); - assertTrue(authorData.getOtherNames().get(0).equals("Andrew C. Porteus")); + assertEquals("Andrew C. Porteus", authorData.getOtherNames().get(0)); String jsonData = JsonWriter.create(authorData); assertNotNull(jsonData); } -// @Test -// private void testWorkIdLastModifiedDateXMLParser() throws Exception { -// String xml = IOUtils -// .toString( -// this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml")); -// Map workIdLastModifiedDate = XMLRecordParser.retrieveWorkIdLastModifiedDate(xml.getBytes()); -// workIdLastModifiedDate.forEach((k, v) -> { -// try { -// OrcidClientTest -// .logToFile( -// k + " " + v + " isModified after " + SparkDownloadOrcidWorks.lastUpdateValue + ": " -// + SparkDownloadOrcidWorks.isModified("0000-0001-5004-5918", v)); -// } catch (IOException e) { -// } -// }); -// } - @Test - public void testAuthorSummaryXMLParser() throws Exception { + void testAuthorSummaryXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml")); @@ -124,7 +98,7 @@ public class XMLRecordParserTest { } @Test - public void testWorkDataXMLParser() throws Exception { + void testWorkDataXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java index 01e26dcb4..1dbb37fca 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java @@ -11,36 +11,37 @@ import org.slf4j.LoggerFactory; import com.google.gson.JsonElement; import com.google.gson.JsonParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; import jdk.nashorn.internal.ir.annotations.Ignore; -public class PublicationToOafTest { +class PublicationToOafTest { private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class); @Test @Ignore - private void convertOafPublicationTest() throws Exception { + void convertOafPublicationTest() throws Exception { String jsonPublication = IOUtils .toString( PublicationToOafTest.class.getResourceAsStream("publication.json")); JsonElement j = new JsonParser().parse(jsonPublication); - logger.info("json publication loaded: " + j.toString()); + logger.info("json publication loaded: {}", j.toString()); PublicationToOaf publicationToOaf = new PublicationToOaf(); Publication oafPublication = (Publication) publicationToOaf .generatePublicationActionsFromDump(j.getAsJsonObject()); assertNotNull(oafPublication.getId()); assertNotNull(oafPublication.getOriginalId()); - assertEquals(oafPublication.getOriginalId().get(0), "60153327"); - logger.info("oafPublication.getId(): " + oafPublication.getId()); + assertEquals("60153327", oafPublication.getOriginalId().get(0)); + logger.info("oafPublication.getId(): {}", oafPublication.getId()); assertEquals( - oafPublication.getTitle().get(0).getValue(), - "Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp"); + "Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp", + oafPublication.getTitle().get(0).getValue()); assertNotNull(oafPublication.getLastupdatetimestamp()); assertNotNull(oafPublication.getDateofcollection()); assertNotNull(oafPublication.getDateoftransformation()); - assertTrue(oafPublication.getAuthor().size() == 7); + assertEquals(7, oafPublication.getAuthor().size()); oafPublication.getAuthor().forEach(a -> { assertNotNull(a.getFullname()); assertNotNull(a.getRank()); @@ -64,15 +65,15 @@ public class PublicationToOafTest { if (oafPublication.getExternalReference() != null) { oafPublication.getExternalReference().forEach(e -> { assertNotNull(e.getRefidentifier()); - assertEquals(e.getQualifier().getSchemeid(), "dnet:pid_types"); + assertEquals(ModelConstants.DNET_PID_TYPES, e.getQualifier().getSchemeid()); }); } assertNotNull(oafPublication.getInstance()); oafPublication.getInstance().forEach(i -> { assertNotNull(i.getInstancetype().getClassid()); - logger.info("i.getInstancetype().getClassid(): " + i.getInstancetype().getClassid()); + logger.info("i.getInstancetype().getClassid(): {}", i.getInstancetype().getClassid()); assertNotNull(i.getInstancetype().getClassname()); - logger.info("i.getInstancetype().getClassname(): " + i.getInstancetype().getClassname()); + logger.info("i.getInstancetype().getClassname(): {}", i.getInstancetype().getClassname()); }); } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index 54c2d6217..99ec656d5 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -1,8 +1,7 @@ package eu.dnetlib.doiboost.orcidnodoi.xml; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; import java.util.*; @@ -25,7 +24,7 @@ import eu.dnetlib.dhp.schema.orcid.Contributor; import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; -public class OrcidNoDoiTest { +class OrcidNoDoiTest { private static final Logger logger = LoggerFactory.getLogger(OrcidNoDoiTest.class); @@ -34,7 +33,7 @@ public class OrcidNoDoiTest { static String orcidIdA = "0000-0003-2760-1191"; @Test - public void readPublicationFieldsTest() + void readPublicationFieldsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); String xml = IOUtils @@ -44,10 +43,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -87,7 +82,7 @@ public class OrcidNoDoiTest { } @Test - public void authorDoubleMatchTest() throws Exception { + void authorDoubleMatchTest() throws Exception { logger.info("running authorSimpleMatchTest ...."); String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml"; AuthorData author = new AuthorData(); @@ -98,71 +93,49 @@ public class OrcidNoDoiTest { .toString( OrcidNoDoiTest.class.getResourceAsStream(orcidWork)); - if (xml == null) { - logger.info("Resource not found"); - } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } - WorkDetail workData = null; - try { - workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); - } catch (Exception e) { - logger.error("parsing xml", e); - } + WorkDetail workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); + assertNotNull(workData); Contributor a = workData.getContributors().get(0); - assertTrue(a.getCreditName().equals("Abdel-Dayem K")); + assertEquals("Abdel-Dayem K", a.getCreditName()); AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 6); + assertEquals(6, workData.getContributors().size()); } @Test - public void readContributorsTest() + void readContributorsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); String xml = IOUtils .toString( OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191_contributors.xml")); - if (xml == null) { - logger.info("Resource not found"); - } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } - WorkDetail workData = null; - try { - workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); - } catch (Exception e) { - logger.error("parsing xml", e); - } + WorkDetail workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); + assertNotNull(workData.getContributors()); - assertTrue(workData.getContributors().size() == 5); + assertEquals(5, workData.getContributors().size()); assertTrue(StringUtils.isBlank(workData.getContributors().get(0).getCreditName())); - assertTrue(workData.getContributors().get(0).getSequence().equals("seq0")); - assertTrue(workData.getContributors().get(0).getRole().equals("role0")); - assertTrue(workData.getContributors().get(1).getCreditName().equals("creditname1")); + assertEquals("seq0", workData.getContributors().get(0).getSequence()); + assertEquals("role0", workData.getContributors().get(0).getRole()); + assertEquals("creditname1", workData.getContributors().get(1).getCreditName()); assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getSequence())); assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getRole())); - assertTrue(workData.getContributors().get(2).getCreditName().equals("creditname2")); - assertTrue(workData.getContributors().get(2).getSequence().equals("seq2")); + assertEquals("creditname2", workData.getContributors().get(2).getCreditName()); + assertEquals("seq2", workData.getContributors().get(2).getSequence()); assertTrue(StringUtils.isBlank(workData.getContributors().get(2).getRole())); - assertTrue(workData.getContributors().get(3).getCreditName().equals("creditname3")); + assertEquals("creditname3", workData.getContributors().get(3).getCreditName()); assertTrue(StringUtils.isBlank(workData.getContributors().get(3).getSequence())); - assertTrue(workData.getContributors().get(3).getRole().equals("role3")); + assertEquals("role3", workData.getContributors().get(3).getRole()); assertTrue(StringUtils.isBlank(workData.getContributors().get(4).getCreditName())); - assertTrue(workData.getContributors().get(4).getSequence().equals("seq4")); - assertTrue(workData.getContributors().get(4).getRole().equals("role4")); + assertEquals("seq4", workData.getContributors().get(4).getSequence()); + assertEquals("role4", workData.getContributors().get(4).getRole()); } @Test - public void authorSimpleMatchTest() throws Exception { + void authorSimpleMatchTest() throws Exception { String orcidWork = "activity_work_0000-0002-5982-8983.xml"; AuthorData author = new AuthorData(); author.setName("Parkhouse"); @@ -175,10 +148,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -188,20 +157,21 @@ public class OrcidNoDoiTest { assertNotNull(workData); Contributor a = workData.getContributors().get(0); - assertTrue(a.getCreditName().equals("Parkhouse, H.")); + assertEquals("Parkhouse, H.", a.getCreditName()); AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 2); + assertEquals(2, workData.getContributors().size()); Contributor c = workData.getContributors().get(0); - assertTrue(c.getOid().equals("0000-0002-5982-8983")); - assertTrue(c.getName().equals("Parkhouse")); - assertTrue(c.getSurname().equals("H.")); - assertTrue(c.getCreditName().equals("Parkhouse, H.")); + + assertEquals("0000-0002-5982-8983", c.getOid()); + assertEquals("Parkhouse", c.getName()); + assertEquals("H.", c.getSurname()); + assertEquals("Parkhouse, H.", c.getCreditName()); } @Test - public void match() { + void match() { AuthorData author = new AuthorData(); author.setName("Joe"); @@ -210,7 +180,6 @@ public class OrcidNoDoiTest { Contributor contributor = new Contributor(); contributor.setCreditName("Joe Dodge"); List contributors = Arrays.asList(contributor); - AuthorMatcher am = new AuthorMatcher(); int matchCounter = 0; List matchCounters = Arrays.asList(matchCounter); contributors @@ -225,12 +194,13 @@ public class OrcidNoDoiTest { } }); - assertTrue(matchCounters.get(0) == 1); + assertEquals(1, matchCounters.get(0)); AuthorMatcher.updateAuthorsSimpleMatch(contributors, author); - assertTrue(contributors.get(0).getName().equals("Joe")); - assertTrue(contributors.get(0).getSurname().equals("Dodge")); - assertTrue(contributors.get(0).getCreditName().equals("Joe Dodge")); - assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333")); + + assertEquals("Joe", contributors.get(0).getName()); + assertEquals("Dodge", contributors.get(0).getSurname()); + assertEquals("Joe Dodge", contributors.get(0).getCreditName()); + assertEquals("0000-1111-2222-3333", contributors.get(0).getOid()); AuthorData authorX = new AuthorData(); authorX.setName(nameA); @@ -259,7 +229,7 @@ public class OrcidNoDoiTest { } }); - assertTrue(matchCounters2.get(0) == 2); + assertEquals(2, matchCounters2.get(0)); assertTrue(contributorList.get(0).isSimpleMatch()); assertTrue(contributorList.get(1).isSimpleMatch()); @@ -271,7 +241,7 @@ public class OrcidNoDoiTest { c.setScore(AuthorMatcher.bestMatch(authorX.getName(), authorX.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= AuthorMatcher.threshold) + .filter(c -> c.getScore() >= AuthorMatcher.THRESHOLD) .max(Comparator.comparing(c -> c.getScore())); assertTrue(optCon.isPresent()); @@ -281,15 +251,16 @@ public class OrcidNoDoiTest { assertTrue(contributorList.get(0).isBestMatch()); assertTrue(!contributorList.get(1).isBestMatch()); AuthorMatcher.updateAuthorsSimilarityMatch(contributorList, authorX); - assertTrue(contributorList.get(0).getName().equals(nameA)); - assertTrue(contributorList.get(0).getSurname().equals(surnameA)); - assertTrue(contributorList.get(0).getCreditName().equals("Abdel-Dayem Khai")); - assertTrue(contributorList.get(0).getOid().equals(orcidIdA)); + + assertEquals(nameA, contributorList.get(0).getName()); + assertEquals(surnameA, contributorList.get(0).getSurname()); + assertEquals("Abdel-Dayem Khai", contributorList.get(0).getCreditName()); + assertEquals(orcidIdA, contributorList.get(0).getOid()); assertTrue(StringUtils.isBlank(contributorList.get(1).getOid())); } @Test - public void authorBestMatchTest() throws Exception { + void authorBestMatchTest() throws Exception { String name = "Khairy"; String surname = "Abdel Dayem"; String orcidWork = "activity_work_0000-0003-2760-1191.xml"; @@ -304,10 +275,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -315,16 +282,17 @@ public class OrcidNoDoiTest { logger.error("parsing xml", e); } AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 5); + assertEquals(5, workData.getContributors().size()); List c = workData.getContributors(); - assertTrue(c.get(0).getName().equals(name)); - assertTrue(c.get(0).getSurname().equals(surname)); - assertTrue(c.get(0).getCreditName().equals("Khair Abde Daye")); - assertTrue(c.get(0).getOid().equals(orcidIdA)); + + assertEquals(name, c.get(0).getName()); + assertEquals(surname, c.get(0).getSurname()); + assertEquals("Khair Abde Daye", c.get(0).getCreditName()); + assertEquals(orcidIdA, c.get(0).getOid()); } @Test - public void otherNamesMatchTest() + void otherNamesMatchTest() throws VtdException, ParseException, IOException, XPathEvalException, NavException, XPathParseException { AuthorData author = new AuthorData(); @@ -341,8 +309,9 @@ public class OrcidNoDoiTest { contributor.setCreditName("XY"); List contributors = Arrays.asList(contributor); AuthorMatcher.match(author, contributors); - assertTrue(contributors.get(0).getName().equals("Joe")); - assertTrue(contributors.get(0).getSurname().equals("Dodge")); - assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333")); + + assertEquals("Joe", contributors.get(0).getName()); + assertEquals("Dodge", contributors.get(0).getSurname()); + assertEquals("0000-1111-2222-3333", contributors.get(0).getOid()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 692605b03..0b4a80b2d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -14,12 +14,17 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; public class PropagationConstant { + + private PropagationConstant() { + } + public static final String INSTITUTIONAL_REPO_TYPE = "pubsrepository::institutional"; public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java index 0f45d3beb..8e76b5778 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java @@ -5,16 +5,11 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import com.google.gson.Gson; /** Created by miriam on 01/08/2018. */ public class Community implements Serializable { - private static final Log log = LogFactory.getLog(Community.class); - private String id; private List subjects = new ArrayList<>(); private List providers = new ArrayList<>(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 844fe2962..5d92f5ab6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -2,15 +2,9 @@ package eu.dnetlib.dhp.bulktag.community; import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.stream.Collectors; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.gson.Gson; @@ -22,8 +16,6 @@ import eu.dnetlib.dhp.bulktag.criteria.Selection; /** Created by miriam on 02/08/2018. */ public class CommunityConfiguration implements Serializable { - private static final Log log = LogFactory.getLog(CommunityConfiguration.class); - private Map communities; // map subject -> communityid @@ -136,7 +128,7 @@ public class CommunityConfiguration implements Serializable { else return null; }) - .filter(st -> (st != null)) + .filter(Objects::nonNull) .collect(Collectors.toList()); } @@ -161,7 +153,7 @@ public class CommunityConfiguration implements Serializable { private List getContextIds(List> list) { if (list != null) { - return list.stream().map(p -> p.getFst()).collect(Collectors.toList()); + return list.stream().map(Pair::getFst).collect(Collectors.toList()); } return Lists.newArrayList(); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index 749ed292f..822d35078 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -13,6 +13,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -31,11 +32,16 @@ public class CommunityConfigurationFactory { private static final VerbResolver resolver = VerbResolverFactory.newInstance(); - public static CommunityConfiguration newInstance(final String xml) throws DocumentException { + private CommunityConfigurationFactory() { + } + + public static CommunityConfiguration newInstance(final String xml) throws DocumentException, SAXException { log.debug(String.format("parsing community configuration from:\n%s", xml)); - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); final Map communities = Maps.newHashMap(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java index e0856ae8f..9002e718f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java @@ -13,9 +13,6 @@ public class Constraint implements Serializable { private String value; private Selection selection; - public Constraint() { - } - public String getVerb() { return verb; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java index b56dfaaa3..0f6fab238 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java @@ -19,11 +19,8 @@ import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 02/08/2018. */ public class Constraints implements Serializable { private static final Log log = LogFactory.getLog(Constraints.class); - // private ConstraintEncapsulator ce; - private List constraint; - public Constraints() { - } + private List constraint; public List getConstraint() { return constraint; @@ -44,13 +41,8 @@ public class Constraints implements Serializable { try { st.setSelection(resolver); - } catch (NoSuchMethodException e) { - log.error(e.getMessage()); - } catch (IllegalAccessException e) { - log.error(e.getMessage()); - } catch (InvocationTargetException e) { - log.error(e.getMessage()); - } catch (InstantiationException e) { + } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException + | InstantiationException e) { log.error(e.getMessage()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java index a9427b594..cb198dc43 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java @@ -19,10 +19,6 @@ public class Provider implements Serializable { private SelectionConstraints selectionConstraints; - public SelectionConstraints getSelCriteria() { - return selectionConstraints; - } - public SelectionConstraints getSelectionConstraints() { return selectionConstraints; } @@ -31,10 +27,6 @@ public class Provider implements Serializable { this.selectionConstraints = selectionConstraints; } - public void setSelCriteria(SelectionConstraints selCriteria) { - this.selectionConstraints = selCriteria; - } - public String getOpenaireId() { return openaireId; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java index 993a7ef77..89cf5beff 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag.community; import java.util.List; import org.dom4j.DocumentException; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; @@ -63,7 +64,7 @@ public class QueryInformationSystem { + " "; public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl) - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); final List res = isLookUp.quickSearchProfile(XQUERY); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index c9ff26963..c8b1bc8fe 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -43,7 +43,6 @@ public class ResultTagger implements Serializable { param.put(key, jsonContext.read(params.get(key))); } catch (com.jayway.jsonpath.PathNotFoundException e) { param.put(key, new ArrayList<>()); - // throw e; } } return param; @@ -52,9 +51,6 @@ public class ResultTagger implements Serializable { public R enrichContextCriteria( final R result, final CommunityConfiguration conf, final Map criteria) { - // } - // public Result enrichContextCriteria(final Result result, final CommunityConfiguration - // conf, final Map criteria) { final Map> param = getParamMap(result, criteria); // Verify if the entity is deletedbyinference. In case verify if to clean the context list @@ -74,7 +70,7 @@ public class ResultTagger implements Serializable { result .getSubject() .stream() - .map(subject -> subject.getValue()) + .map(StructuredProperty::getValue) .filter(StringUtils::isNotBlank) .map(String::toLowerCase) .map(String::trim) @@ -90,15 +86,11 @@ public class ResultTagger implements Serializable { if (Objects.nonNull(result.getInstance())) { for (Instance i : result.getInstance()) { - if (Objects.nonNull(i.getCollectedfrom())) { - if (Objects.nonNull(i.getCollectedfrom().getKey())) { - tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); - } + if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) { + tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); } - if (Objects.nonNull(i.getHostedby())) { - if (Objects.nonNull(i.getHostedby().getKey())) { - tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); - } + if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) { + tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); } } @@ -149,52 +141,46 @@ public class ResultTagger implements Serializable { return result; } - result - .getContext() - .stream() - .map( - c -> { - if (communities.contains(c.getId())) { - Optional> opt_dataInfoList = Optional.ofNullable(c.getDataInfo()); - List dataInfoList; - if (opt_dataInfoList.isPresent()) - dataInfoList = opt_dataInfoList.get(); - else { - dataInfoList = new ArrayList<>(); - c.setDataInfo(dataInfoList); - } - if (subjects.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_SUBJECT, - CLASS_NAME_BULKTAG_SUBJECT, - TAGGING_TRUST)); - if (datasources.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_DATASOURCE, - CLASS_NAME_BULKTAG_DATASOURCE, - TAGGING_TRUST)); - if (czenodo.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_CZENODO, - CLASS_NAME_BULKTAG_ZENODO, - TAGGING_TRUST)); - } - return c; - }) - .collect(Collectors.toList()); + result.getContext().forEach(c -> { + if (communities.contains(c.getId())) { + Optional> opt_dataInfoList = Optional.ofNullable(c.getDataInfo()); + List dataInfoList; + if (opt_dataInfoList.isPresent()) + dataInfoList = opt_dataInfoList.get(); + else { + dataInfoList = new ArrayList<>(); + c.setDataInfo(dataInfoList); + } + if (subjects.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_SUBJECT, + CLASS_NAME_BULKTAG_SUBJECT, + TAGGING_TRUST)); + if (datasources.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_DATASOURCE, + CLASS_NAME_BULKTAG_DATASOURCE, + TAGGING_TRUST)); + if (czenodo.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_CZENODO, + CLASS_NAME_BULKTAG_ZENODO, + TAGGING_TRUST)); + } + }); communities .removeAll( - result.getContext().stream().map(c -> c.getId()).collect(Collectors.toSet())); + result.getContext().stream().map(Context::getId).collect(Collectors.toSet())); if (communities.isEmpty()) return result; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java index 71ff61d1b..c7dcce812 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java @@ -15,9 +15,6 @@ import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; public class SelectionConstraints implements Serializable { private List criteria; - public SelectionConstraints() { - } - public List getCriteria() { return criteria; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java index 80d98bb1a..8274e26c9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.bulktag.community; public class TaggingConstants { + private TaggingConstants() { + } + public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging"; public static final String CLASS_ID_SUBJECT = "community:subject"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java index bc6b75fba..54c2dc9aa 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java @@ -31,7 +31,6 @@ public class ZenodoCommunity implements Serializable { } private void setSelCriteria(String json) { - // Type collectionType = new TypeToken>(){}.getType(); selCriteria = new Gson().fromJson(json, SelectionConstraints.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java index ec9fb716d..9129e6e54 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java @@ -1,7 +1,9 @@ package eu.dnetlib.dhp.bulktag.criteria; -public interface Selection { +import java.io.Serializable; + +public interface Selection extends Serializable { boolean apply(String value); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index 54176efb6..459ac1ba9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -7,16 +7,16 @@ import java.util.Map; import java.util.stream.Collectors; import io.github.classgraph.ClassGraph; -import io.github.classgraph.ClassInfo; import io.github.classgraph.ClassInfoList; import io.github.classgraph.ScanResult; public class VerbResolver implements Serializable { - private Map> map = null; // = new HashMap<>(); - private final ClassGraph classgraph = new ClassGraph(); + + private Map> map = null; public VerbResolver() { + final ClassGraph classgraph = new ClassGraph(); try (ScanResult scanResult = // Assign scanResult in try-with-resources classgraph // Create a new ClassGraph instance .verbose() // If you want to enable logging to stderr @@ -41,8 +41,6 @@ public class VerbResolver implements Serializable { .get(0) .getValue(), value -> (Class) value.loadClass())); - } catch (Exception e) { - e.printStackTrace(); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java index 0bb801999..446ad5fbc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.bulktag.criteria; public class VerbResolverFactory { + private VerbResolverFactory() { + } + public static VerbResolver newInstance() { return new VerbResolver(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 04a659a1c..ddc7f93f7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -6,11 +6,10 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -18,11 +17,11 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Relation; /** * For the association of the country to the datasource The association is computed only for datasource of specific type @@ -77,16 +76,16 @@ public class PrepareDatasourceCountryAssociation { List allowedtypes, String inputPath, String outputPath) { - String whitelisted = " d.id = '" + whitelist.get(0) + "'"; - for (int i = 1; i < whitelist.size(); i++) { - whitelisted += " OR d.id = '" + whitelist.get(i) + "'"; - } - String allowed = "d.datasourcetype.classid = '" + allowedtypes.get(0) + "'"; + final String whitelisted = whitelist + .stream() + .map(id -> " d.id = '" + id + "'") + .collect(Collectors.joining(" OR ")); - for (int i = 1; i < allowedtypes.size(); i++) { - allowed += " OR d.datasourcetype.classid = '" + allowedtypes.get(i) + "'"; - } + final String allowed = allowedtypes + .stream() + .map(type -> " d.datasourcetype.classid = '" + type + "'") + .collect(Collectors.joining(" OR ")); Dataset datasource = readPath(spark, inputPath + "/datasource", Datasource.class); Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 8d0d6c48b..77f7288f6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -85,13 +85,12 @@ public class PrepareResultCountrySet { Dataset result = readPath(spark, inputPath, resultClazz); result.createOrReplaceTempView("result"); - // log.info("number of results: {}", result.count()); + createCfHbforResult(spark); Dataset datasource_country = readPath(spark, datasourcecountrypath, DatasourceCountry.class); datasource_country.createOrReplaceTempView("datasource_country"); - // log.info("datasource_country number : {}", datasource_country.count()); spark .sql(RESULT_COUNTRYSET_QUERY) @@ -102,7 +101,7 @@ public class PrepareResultCountrySet { ArrayList countryList = a.getCountrySet(); Set countryCodes = countryList .stream() - .map(country -> country.getClassid()) + .map(CountrySbs::getClassid) .collect(Collectors.toSet()); b .getCountrySet() @@ -119,10 +118,6 @@ public class PrepareResultCountrySet { }) .map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2())) .saveAsTextFile(outputPath, GzipCodec.class); -// .write() -// .option("compression", "gzip") -// .mode(SaveMode.Append) -// .json(outputPath); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 97e0a33e1..4aa48583f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -4,7 +4,9 @@ package eu.dnetlib.dhp.countrypropagation; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.util.*; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -17,10 +19,9 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; @@ -28,8 +29,6 @@ public class SparkCountryPropagationJob { private static final Logger log = LoggerFactory.getLogger(SparkCountryPropagationJob.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -90,7 +89,6 @@ public class SparkCountryPropagationJob { boolean saveGraph) { if (saveGraph) { - // updateResultTable(spark, potentialUpdates, inputPath, resultClazz, outputPath); log.info("Reading Graph table from: {}", sourcePath); Dataset res = readPath(spark, sourcePath, resultClazz); @@ -122,7 +120,7 @@ public class SparkCountryPropagationJob { private static List merge(List c1, List c2) { HashSet countries = c1 .stream() - .map(c -> c.getClassid()) + .map(Qualifier::getClassid) .collect(Collectors.toCollection(HashSet::new)); return c2 diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index b3ef3a112..95b870292 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -8,9 +8,7 @@ import java.util.Arrays; import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -18,7 +16,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java index 2cea32e58..c60012a74 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java @@ -87,7 +87,7 @@ public class PrepareResultOrcidAssociationStep2 { }); return a; }) - .map(c -> c._2()) + .map(Tuple2::_2) .map(r -> OBJECT_MAPPER.writeValueAsString(r)) .saveAsTextFile(outputPath, GzipCodec.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 436a53cbe..40faef7f3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -18,7 +18,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -141,34 +140,31 @@ public class SparkOrcidToResultFromSemRelJob { author_surname = author.getSurname(); } if (StringUtils.isNotEmpty(author_surname)) { + // have the same surname. Check the name if (autoritative_author .getSurname() .trim() - .equalsIgnoreCase(author_surname.trim())) { - - // have the same surname. Check the name - if (StringUtils.isNotEmpty(autoritative_author.getName())) { - if (StringUtils.isNotEmpty(author.getName())) { - author_name = author.getName(); + .equalsIgnoreCase(author_surname.trim()) && StringUtils.isNotEmpty(autoritative_author.getName())) { + if (StringUtils.isNotEmpty(author.getName())) { + author_name = author.getName(); + } + if (StringUtils.isNotEmpty(author_name)) { + if (autoritative_author + .getName() + .trim() + .equalsIgnoreCase(author_name.trim())) { + toaddpid = true; } - if (StringUtils.isNotEmpty(author_name)) { + // they could be differently written (i.e. only the initials of the name + // in one of the two + else { if (autoritative_author .getName() .trim() - .equalsIgnoreCase(author_name.trim())) { + .substring(0, 0) + .equalsIgnoreCase(author_name.trim().substring(0, 0))) { toaddpid = true; } - // they could be differently written (i.e. only the initials of the name - // in one of the two - else { - if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author_name.trim().substring(0, 0))) { - toaddpid = true; - } - } } } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java index 27ff727fd..ac61e26f9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java @@ -2,30 +2,25 @@ package eu.dnetlib.dhp.projecttoresult; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.PropagationConstant.getConstraintList; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; public class PrepareProjectResultsAssociation { - private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class); + private static final Logger log = LoggerFactory.getLogger(PrepareProjectResultsAssociation.class); public static void main(String[] args) throws Exception { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index c57abb451..1ec521af1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -5,28 +5,27 @@ import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; public class SparkResultToProjectThroughSemRelJob { - private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class); + private static final Logger log = LoggerFactory.getLogger(SparkResultToProjectThroughSemRelJob.class); public static void main(String[] args) throws Exception { @@ -95,26 +94,21 @@ public class SparkResultToProjectThroughSemRelJob { private static FlatMapFunction, Relation> mapRelationRn() { return value -> { - List new_relations = new ArrayList<>(); - ResultProjectSet potential_update = value._1(); - Optional already_linked = Optional.ofNullable(value._2()); - if (already_linked.isPresent()) { - already_linked - .get() - .getProjectSet() - .stream() - .forEach( - (p -> { - potential_update.getProjectSet().remove(p); - })); - } - String resId = potential_update.getResultId(); - potential_update + List newRelations = new ArrayList<>(); + ResultProjectSet potentialUpdate = value._1(); + Optional alreadyLinked = Optional.ofNullable(value._2()); + alreadyLinked + .ifPresent( + resultProjectSet -> resultProjectSet + .getProjectSet() + .forEach( + (p -> potentialUpdate.getProjectSet().remove(p)))); + String resId = potentialUpdate.getResultId(); + potentialUpdate .getProjectSet() - .stream() .forEach( projectId -> { - new_relations + newRelations .add( getRelation( resId, @@ -125,7 +119,7 @@ public class SparkResultToProjectThroughSemRelJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); - new_relations + newRelations .add( getRelation( projectId, @@ -137,7 +131,7 @@ public class SparkResultToProjectThroughSemRelJob { PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); }); - return new_relations.iterator(); + return newRelations.iterator(); }; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index a5f84cd2f..1a008797d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -10,11 +10,12 @@ import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -109,10 +110,6 @@ public class PrepareResultCommunitySet { }) .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) .saveAsTextFile(outputPath, GzipCodec.class); -// .write() -// .mode(SaveMode.Overwrite) -// .option("compression", "gzip") -// .json(outputPath); } private static MapFunction mapResultCommunityFn( @@ -131,7 +128,7 @@ public class PrepareResultCommunitySet { communitySet.addAll(organizationMap.get(oId)); } } - if (communitySet.size() > 0) { + if (!communitySet.isEmpty()) { ResultCommunityList rcl = new ResultCommunityList(); rcl.setResultId(rId); ArrayList communityList = new ArrayList<>(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 7201a30f6..cb80a90ca 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -4,7 +4,10 @@ package eu.dnetlib.dhp.resulttocommunityfromorganization; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -17,10 +20,9 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Context; +import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; public class SparkResultToCommunityFromOrganizationJob { @@ -59,6 +61,7 @@ public class SparkResultToCommunityFromOrganizationJob { .orElse(Boolean.TRUE); log.info("saveGraph: {}", saveGraph); + @SuppressWarnings("unchecked") Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); @@ -106,9 +109,12 @@ public class SparkResultToCommunityFromOrganizationJob { List contextList = ret .getContext() .stream() - .map(con -> con.getId()) + .map(Context::getId) .collect(Collectors.toList()); + + @SuppressWarnings("unchecked") R res = (R) ret.getClass().newInstance(); + res.setId(ret.getId()); List propagatedContexts = new ArrayList<>(); for (String cId : communitySet) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java index 09340369d..0ddb19a1a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java @@ -11,7 +11,6 @@ import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -91,7 +90,7 @@ public class PrepareResultCommunitySetStep2 { }); return a; }) - .map(c -> c._2()) + .map(Tuple2::_2) .map(r -> OBJECT_MAPPER.writeValueAsString(r)) .saveAsTextFile(outputPath, GzipCodec.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 4cb241ef2..3690351fb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -62,6 +62,7 @@ public class SparkResultToCommunityThroughSemRelJob { .orElse(Boolean.TRUE); log.info("saveGraph: {}", saveGraph); + @SuppressWarnings("unchecked") Class resultClazz = (Class) Class.forName(resultClassName); runWithSparkHiveSession( @@ -105,15 +106,15 @@ public class SparkResultToCommunityThroughSemRelJob { R ret = value._1(); Optional rcl = Optional.ofNullable(value._2()); if (rcl.isPresent()) { - Set context_set = new HashSet<>(); - ret.getContext().stream().forEach(c -> context_set.add(c.getId())); + Set contexts = new HashSet<>(); + ret.getContext().forEach(c -> contexts.add(c.getId())); List contextList = rcl .get() .getCommunityList() .stream() .map( c -> { - if (!context_set.contains(c)) { + if (!contexts.contains(c)) { Context newContext = new Context(); newContext.setId(c); newContext @@ -130,7 +131,10 @@ public class SparkResultToCommunityThroughSemRelJob { }) .filter(Objects::nonNull) .collect(Collectors.toList()); + + @SuppressWarnings("unchecked") R r = (R) ret.getClass().newInstance(); + r.setId(ret.getId()); r.setContext(contextList); ret.mergeFrom(r); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index 3cf36e572..0ef5ca181 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -8,6 +8,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; @@ -91,13 +92,11 @@ public class PrepareResultInstRepoAssociation { private static void prepareDatasourceOrganization( SparkSession spark, String datasourceOrganizationPath, List blacklist) { - String blacklisted = ""; - if (blacklist.size() > 0) { - blacklisted = " AND id != '" + blacklist.get(0) + "'"; - for (int i = 1; i < blacklist.size(); i++) { - blacklisted += " AND id != '" + blacklist.get(i) + "'"; - } - } + + final String blacklisted = blacklist + .stream() + .map(s -> " AND id != '" + s + "'") + .collect(Collectors.joining()); String query = "SELECT source datasourceId, target organizationId " + "FROM ( SELECT id " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 01d7b85e4..63824f1a8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -4,23 +4,24 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.broadcast.Broadcast; -import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; public class SparkResultToOrganizationFromIstRepoJob { @@ -84,7 +85,6 @@ public class SparkResultToOrganizationFromIstRepoJob { conf, isSparkSessionManaged, spark -> { - // removeOutputDir(spark, outputPath); if (saveGraph) { execPropagation( spark, @@ -105,9 +105,9 @@ public class SparkResultToOrganizationFromIstRepoJob { String outputPath, Class clazz) { - Dataset ds_org = readPath(spark, datasourceorganization, DatasourceOrganization.class); + Dataset dsOrg = readPath(spark, datasourceorganization, DatasourceOrganization.class); - Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, ds_org); + Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, dsOrg); Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, ResultOrganizationSet.class); @@ -125,26 +125,20 @@ public class SparkResultToOrganizationFromIstRepoJob { private static FlatMapFunction, Relation> createRelationFn() { return value -> { - List new_relations = new ArrayList<>(); - ResultOrganizationSet potential_update = value._1(); - Optional already_linked = Optional.ofNullable(value._2()); - List organization_list = potential_update.getOrganizationSet(); - if (already_linked.isPresent()) { - already_linked - .get() - .getOrganizationSet() - .stream() - .forEach( - rId -> { - organization_list.remove(rId); - }); - } - String resultId = potential_update.getResultId(); - organization_list - .stream() + List newRelations = new ArrayList<>(); + ResultOrganizationSet potentialUpdate = value._1(); + Optional alreadyLinked = Optional.ofNullable(value._2()); + List organizations = potentialUpdate.getOrganizationSet(); + alreadyLinked + .ifPresent( + resOrg -> resOrg + .getOrganizationSet() + .forEach(organizations::remove)); + String resultId = potentialUpdate.getResultId(); + organizations .forEach( orgId -> { - new_relations + newRelations .add( getRelation( orgId, @@ -155,7 +149,7 @@ public class SparkResultToOrganizationFromIstRepoJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); - new_relations + newRelations .add( getRelation( resultId, @@ -167,7 +161,7 @@ public class SparkResultToOrganizationFromIstRepoJob { PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); }); - return new_relations.iterator(); + return newRelations.iterator(); }; } @@ -175,13 +169,13 @@ public class SparkResultToOrganizationFromIstRepoJob { SparkSession spark, String inputPath, Class resultClazz, - Dataset ds_org) { + Dataset dsOrg) { Dataset result = readPath(spark, inputPath, resultClazz); result.createOrReplaceTempView("result"); createCfHbforResult(spark); - ds_org.createOrReplaceTempView("rels"); + dsOrg.createOrReplaceTempView("rels"); return spark .sql(RESULT_ORGANIZATIONSET_QUERY) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 72e0a63fa..07299f01a 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -90,7 +90,7 @@ public class BulkTagJobTest { } @Test - public void noUpdatesTest() throws Exception { + void noUpdatesTest() throws Exception { final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( @@ -128,7 +128,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectNoPreviousContextTest() throws Exception { + void bulktagBySubjectNoPreviousContextTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") .getPath(); @@ -224,7 +224,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { + void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { final String sourcePath = getClass() .getResource( "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") @@ -306,7 +306,7 @@ public class BulkTagJobTest { } @Test - public void bulktagByDatasourceTest() throws Exception { + void bulktagByDatasourceTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") .getPath(); @@ -378,7 +378,7 @@ public class BulkTagJobTest { } @Test - public void bulktagByZenodoCommunityTest() throws Exception { + void bulktagByZenodoCommunityTest() throws Exception { final String sourcePath = getClass() .getResource( "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") @@ -500,7 +500,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectDatasourceTest() throws Exception { + void bulktagBySubjectDatasourceTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") .getPath(); @@ -628,7 +628,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectDatasourceZenodoCommunityTest() throws Exception { + void bulktagBySubjectDatasourceZenodoCommunityTest() throws Exception { SparkBulkTagJob .main( @@ -724,7 +724,7 @@ public class BulkTagJobTest { } @Test - public void bulktagDatasourcewithConstraintsTest() throws Exception { + void bulktagDatasourcewithConstraintsTest() throws Exception { final String sourcePath = getClass() .getResource( diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index ca737b79f..861546adb 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -10,6 +10,7 @@ import org.apache.commons.lang3.StringUtils; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import com.google.gson.Gson; @@ -20,12 +21,12 @@ import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 03/08/2018. */ -public class CommunityConfigurationFactoryTest { +class CommunityConfigurationFactoryTest { private final VerbResolver resolver = new VerbResolver(); @Test - public void parseTest() throws DocumentException, IOException { + void parseTest() throws DocumentException, IOException, SAXException { String xml = IOUtils .toString( getClass() @@ -39,7 +40,7 @@ public class CommunityConfigurationFactoryTest { } @Test - public void applyVerb() + void applyVerb() throws InvocationTargetException, IllegalAccessException, NoSuchMethodException, InstantiationException { Constraint sc = new Constraint(); @@ -52,7 +53,7 @@ public class CommunityConfigurationFactoryTest { } @Test - public void loadSelCriteriaTest() throws DocumentException, IOException { + void loadSelCriteriaTest() throws DocumentException, IOException, SAXException { String xml = IOUtils .toString( getClass() diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java index 88ad43b6b..963ee5529 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java @@ -5,7 +5,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import org.apache.commons.io.FileUtils; @@ -25,6 +24,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Software; import scala.Tuple2; @@ -67,7 +67,7 @@ public class CountryPropagationJobTest { } @Test - public void testCountryPropagationSoftware() throws Exception { + void testCountryPropagationSoftware() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/countrypropagation/sample/software") .getPath(); @@ -105,7 +105,7 @@ public class CountryPropagationJobTest { Dataset countryExploded = verificationDs .flatMap( (FlatMapFunction) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) - .map((MapFunction) c -> c.getClassid(), Encoders.STRING()); + .map((MapFunction) Qualifier::getClassid, Encoders.STRING()); Assertions.assertEquals(9, countryExploded.count()); @@ -119,10 +119,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassid = verificationDs .flatMap((FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( @@ -180,10 +179,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassname = verificationDs .flatMap( (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( @@ -241,10 +239,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryProvenance = verificationDs .flatMap( (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index 238375197..85db7ecf9 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -65,7 +65,7 @@ public class OrcidPropagationJobTest { } @Test - public void noUpdateTest() throws Exception { + void noUpdateTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate") .getPath(); @@ -111,7 +111,7 @@ public class OrcidPropagationJobTest { } @Test - public void oneUpdateTest() throws Exception { + void oneUpdateTest() throws Exception { SparkOrcidToResultFromSemRelJob .main( new String[] { @@ -178,7 +178,7 @@ public class OrcidPropagationJobTest { } @Test - public void twoUpdatesTest() throws Exception { + void twoUpdatesTest() throws Exception { SparkOrcidToResultFromSemRelJob .main( new String[] { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java index abed028e1..2fe1bc574 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java @@ -69,7 +69,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void NoUpdateTest() throws Exception { + void NoUpdateTest() throws Exception { final String potentialUpdateDate = getClass() .getResource( @@ -106,7 +106,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void UpdateTenTest() throws Exception { + void UpdateTenTest() throws Exception { final String potentialUpdatePath = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") @@ -178,7 +178,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void UpdateMixTest() throws Exception { + void UpdateMixTest() throws Exception { final String potentialUpdatepath = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java index d739516fc..4dd8b976c 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java @@ -65,7 +65,7 @@ public class ResultToCommunityJobTest { } @Test - public void testSparkResultToCommunityFromOrganizationJob() throws Exception { + void testSparkResultToCommunityFromOrganizationJob() throws Exception { final String preparedInfoPath = getClass() .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo") .getPath(); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java index 7709e00a8..0d5b12c80 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java @@ -65,7 +65,7 @@ public class ResultToCommunityJobTest { } @Test - public void testSparkResultToCommunityThroughSemRelJob() throws Exception { + void testSparkResultToCommunityThroughSemRelJob() throws Exception { SparkResultToCommunityThroughSemRelJob .main( new String[] { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java index cfcccc5f0..fdcb10fb9 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java @@ -67,7 +67,7 @@ public class ResultToOrganizationJobTest { * @throws Exception */ @Test - public void NoUpdateTest() throws Exception { + void NoUpdateTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") .getPath(); @@ -110,7 +110,7 @@ public class ResultToOrganizationJobTest { * @throws Exception */ @Test - public void UpdateNoMixTest() throws Exception { + void UpdateNoMixTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") .getPath(); @@ -176,7 +176,7 @@ public class ResultToOrganizationJobTest { } @Test - public void UpdateMixTest() throws Exception { + void UpdateMixTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix") .getPath(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index f06059dd5..95aa749b2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.schema.oaf.AccessRight; import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.Qualifier; -public class CleaningRuleMap extends HashMap> implements Serializable { +public class CleaningRuleMap extends HashMap, SerializableConsumer> implements Serializable { /** * Creates the mapping for the Oaf types subject to cleaning diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java index 9ba153ba5..5502fd391 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java @@ -16,7 +16,7 @@ public class OafCleaner implements Serializable { try { navigate(oaf, mapping); } catch (IllegalAccessException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } return oaf; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java index 00ddcb5a8..0d2df11fe 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java @@ -1,22 +1,21 @@ package eu.dnetlib.dhp.oa.graph.dump; -import java.io.*; +import java.io.IOException; +import java.io.Serializable; import java.util.Optional; -import org.apache.commons.compress.archivers.ar.ArArchiveEntry; -import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.MakeTarArchive; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; public class MakeTar implements Serializable { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index d118accba..dc740e811 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -8,6 +8,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -29,7 +30,7 @@ public class QueryInformationSystem { ""; public CommunityMap getCommunityMap() - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { return getMap(isLookUp.quickSearchProfile(XQUERY)); } @@ -42,12 +43,14 @@ public class QueryInformationSystem { this.isLookUp = isLookUpService; } - private CommunityMap getMap(List communityMap) throws DocumentException { + private CommunityMap getMap(List communityMap) throws DocumentException, SAXException { final CommunityMap map = new CommunityMap(); for (String xml : communityMap) { final Document doc; - doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(xml)); Element root = doc.getRootElement(); map.put(root.attribute("id").getValue(), root.attribute("label").getValue()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index d30b3122c..500fe5986 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -93,7 +93,7 @@ public class ResultMapper implements Serializable { .setDocumentationUrl( value .stream() - .map(v -> v.getValue()) + .map(Field::getValue) .collect(Collectors.toList()))); Optional @@ -109,20 +109,20 @@ public class ResultMapper implements Serializable { .setContactgroup( Optional .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setContactperson( Optional .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setTool( Optional .ofNullable(ir.getTool()) - .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); @@ -132,7 +132,8 @@ public class ResultMapper implements Serializable { Optional .ofNullable(input.getAuthor()) - .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); + .ifPresent( + ats -> out.setAuthor(ats.stream().map(ResultMapper::getAuthor).collect(Collectors.toList()))); // I do not map Access Right UNKNOWN or OTHER @@ -219,11 +220,12 @@ public class ResultMapper implements Serializable { if (oInst.isPresent()) { if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { ((GraphResult) out) - .setInstance(oInst.get().stream().map(i -> getGraphInstance(i)).collect(Collectors.toList())); + .setInstance( + oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); } else { ((CommunityResult) out) .setInstance( - oInst.get().stream().map(i -> getCommunityInstance(i)).collect(Collectors.toList())); + oInst.get().stream().map(ResultMapper::getCommunityInstance).collect(Collectors.toList())); } } @@ -422,7 +424,7 @@ public class ResultMapper implements Serializable { Optional .ofNullable(i.getInstancetype()) .ifPresent(value -> instance.setType(value.getClassname())); - Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); + Optional.ofNullable(i.getUrl()).ifPresent(instance::setUrl); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index 6ac626518..f86f6918f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -15,6 +15,7 @@ import org.apache.hadoop.fs.Path; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -31,25 +32,23 @@ public class SaveCommunityMap implements Serializable { private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class); private final QueryInformationSystem queryInformationSystem; - private final Configuration conf; private final BufferedWriter writer; public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException { - conf = new Configuration(); + final Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath); + fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); - writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); - + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); + writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); } public static void main(String[] args) throws Exception { @@ -74,10 +73,9 @@ public class SaveCommunityMap implements Serializable { final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl); scm.saveCommunityMap(); - } - private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException { + private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException, SAXException { writer.write(Utils.OBJECT_MAPPER.writeValueAsString(queryInformationSystem.getCommunityMap())); writer.close(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index fd8262544..ba26b708a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -17,9 +17,9 @@ import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; public class SendToZenodoHDFS implements Serializable { - private final static String NEW = "new"; // to be used for a brand new deposition in zenodo - private final static String VERSION = "version"; // to be used to upload a new version of a published deposition - private final static String UPDATE = "update"; // to upload content to an open deposition not published + private static final String NEW = "new"; // to be used for a brand new deposition in zenodo + private static final String VERSION = "version"; // to be used to upload a new version of a published deposition + private static final String UPDATE = "update"; // to upload content to an open deposition not published private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class); @@ -85,7 +85,6 @@ public class SendToZenodoHDFS implements Serializable { Path p = fileStatus.getPath(); String p_string = p.toString(); if (!p_string.endsWith("_SUCCESS")) { - // String tmp = p_string.substring(0, p_string.lastIndexOf("/")); String name = p_string.substring(p_string.lastIndexOf("/") + 1); log.info("Sending information for community: " + name); if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) { @@ -102,9 +101,9 @@ public class SendToZenodoHDFS implements Serializable { zenodoApiClient.sendMretadata(metadata); } - if (publish) + if (publish) { zenodoApiClient.publish(); - + } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java index 984e8b128..8e75e9d92 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java @@ -25,6 +25,9 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class Utils { public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private Utils() { + } + public static void removeOutputDir(SparkSession spark, String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } @@ -57,7 +60,7 @@ public class Utils { public static CommunityMap readCommunityMap(FileSystem fileSystem, String communityMapPath) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(communityMapPath)))); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); try { String line; while ((line = br.readLine()) != null) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index 55f075e95..a9c0770a8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -57,27 +57,22 @@ public class CommunitySplit implements Serializable { Dataset community_products = result .filter((FilterFunction) r -> containsCommunity(r, c)); - try { - community_products.first(); - community_products - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath + "/" + c); - } catch (Exception e) { - - } - + community_products.first(); + community_products + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath + "/" + c); } private static boolean containsCommunity(CommunityResult r, String c) { if (Optional.ofNullable(r.getContext()).isPresent()) { - return r + return !r .getContext() .stream() .filter(con -> con.getCode().equals(c)) .collect(Collectors.toList()) - .size() > 0; + .isEmpty(); } return false; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java index 63970d14b..7ab8a7540 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java @@ -2,9 +2,7 @@ package eu.dnetlib.dhp.oa.graph.dump.community; import java.io.Serializable; -import java.util.*; - -import javax.swing.text.html.Option; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; @@ -12,7 +10,6 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; -import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java index 2d43888b4..0f2b4c2ed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java @@ -22,6 +22,7 @@ import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; @@ -29,6 +30,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.Provenance; import eu.dnetlib.dhp.schema.dump.oaf.community.Funder; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; @@ -127,11 +129,11 @@ public class SparkPrepareResultProject implements Serializable { op.getCode().getValue(), Optional .ofNullable(op.getAcronym()) - .map(a -> a.getValue()) + .map(Field::getValue) .orElse(null), Optional .ofNullable(op.getTitle()) - .map(v -> v.getValue()) + .map(Field::getValue) .orElse(null), Optional .ofNullable(op.getFundingtree()) @@ -140,7 +142,7 @@ public class SparkPrepareResultProject implements Serializable { .stream() .map(ft -> getFunder(ft.getValue())) .collect(Collectors.toList()); - if (tmp.size() > 0) { + if (!tmp.isEmpty()) { return tmp.get(0); } else { return null; @@ -161,30 +163,23 @@ public class SparkPrepareResultProject implements Serializable { } private static Funder getFunder(String fundingtree) { - // ["nsf_________::NSFNSFNational Science - // FoundationUSnsf_________::NSF::CISE/OAD::CISE/CCFDivision - // of Computing and Communication FoundationsDivision of Computing and Communication - // Foundationsnsf_________::NSF::CISE/OADDirectorate for - // Computer & Information Science & EngineeringDirectorate for Computer & - // Information Science & - // Engineeringnsf:fundingStream"] - Funder f = new Funder(); + final Funder f = new Funder(); final Document doc; try { - doc = new SAXReader().read(new StringReader(fundingtree)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(fundingtree)); f.setShortName(((Node) (doc.selectNodes("//funder/shortname").get(0))).getText()); f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText()); f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText()); for (Object o : doc.selectNodes("//funding_level_0")) { List node = ((Node) o).selectNodes("./name"); f.setFundingStream(((Node) node.get(0)).getText()); - } return f; - } catch (DocumentException e) { - e.printStackTrace(); + } catch (DocumentException | SAXException e) { + throw new IllegalArgumentException(e); } - return f; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java index 2b80b1d86..39ae32053 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java @@ -60,7 +60,7 @@ public class SparkUpdateProjectInfo implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - extend(spark, inputPath, outputPath, preparedInfoPath);// , inputClazz); + extend(spark, inputPath, outputPath, preparedInfoPath); }); } @@ -77,9 +77,7 @@ public class SparkUpdateProjectInfo implements Serializable { "left") .map((MapFunction, CommunityResult>) value -> { CommunityResult r = value._1(); - Optional.ofNullable(value._2()).ifPresent(rp -> { - r.setProjects(rp.getProjectsList()); - }); + Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList())); return r; }, Encoders.bean(CommunityResult.class)) .write() diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java index eb546624e..57708a78d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java @@ -23,5 +23,4 @@ public class Constants implements Serializable { public static final String CONTEXT_NS_PREFIX = "context_____"; public static final String UNKNOWN = "UNKNOWN"; - // public static final String FUNDER_DS = "entityregistry::projects"; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index ccb84c713..120de9327 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -1,12 +1,14 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; -import java.io.*; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.function.Consumer; import java.util.function.Function; -import org.apache.commons.crypto.utils.IoUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -14,15 +16,13 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; -import org.apache.hadoop.io.compress.CompressionOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** * Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and @@ -33,8 +33,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative; public class CreateContextEntities implements Serializable { private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class); - private final Configuration conf; - private final BufferedWriter writer; + private final transient Configuration conf; + private final transient BufferedWriter writer; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -88,7 +88,7 @@ public class CreateContextEntities implements Serializable { } public void execute(final Function producer, String isLookUpUrl) - throws Exception { + throws ISLookUpException { QueryInformationSystem queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); @@ -101,10 +101,9 @@ public class CreateContextEntities implements Serializable { protected void writeEntity(final R r) { try { writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r)); - // log.info("writing context : {}", new Gson().toJson(r)); writer.newLine(); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (final IOException e) { + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index 102406315..10f2014d0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -7,6 +7,7 @@ import java.io.OutputStreamWriter; import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.function.Consumer; import java.util.function.Function; @@ -31,20 +32,21 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; * and the project is not created because of a low coverage in the profiles of openaire ids related to projects */ public class CreateContextRelation implements Serializable { - private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class); - private final Configuration conf; - private final BufferedWriter writer; - private final QueryInformationSystem queryInformationSystem; + private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class); + private final transient Configuration conf; + private final transient BufferedWriter writer; + private final transient QueryInformationSystem queryInformationSystem; private static final String CONTEX_RELATION_DATASOURCE = "contentproviders"; - private static final String CONTEX_RELATION_PROJECT = "projects"; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - CreateContextRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json")); + Objects + .requireNonNull( + CreateContextRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -70,10 +72,6 @@ public class CreateContextRelation implements Serializable { cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class)); log.info("Creating relations for projects... "); -// cce -// .execute( -// Process::getRelation, CONTEX_RELATION_PROJECT, -// ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class)); cce.close(); @@ -107,7 +105,7 @@ public class CreateContextRelation implements Serializable { public void execute(final Function> producer, String category, String prefix) { - final Consumer consumer = ci -> producer.apply(ci).forEach(c -> writeEntity(c)); + final Consumer consumer = ci -> producer.apply(ci).forEach(this::writeEntity); queryInformationSystem.getContextRelation(consumer, category, prefix); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java index 31d105b66..6b9f13277 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java @@ -6,8 +6,6 @@ import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; @@ -21,8 +19,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.*; * context entity and datasource/projects related to the context. */ public class Process implements Serializable { - private static final Logger log = LoggerFactory.getLogger(Process.class); + @SuppressWarnings("unchecked") public static R getEntity(ContextInfo ci) { try { ResearchInitiative ri; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java index c33a693a5..0ed5de67c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java @@ -11,6 +11,7 @@ import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.jetbrains.annotations.NotNull; +import org.xml.sax.SAXException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -84,8 +85,9 @@ public class QueryInformationSystem { final Document doc; try { - - doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(xml)); Element root = doc.getRootElement(); cinfo.setId(root.attributeValue("id")); @@ -102,7 +104,7 @@ public class QueryInformationSystem { } consumer.accept(cinfo); - } catch (DocumentException e) { + } catch (DocumentException | SAXException e) { e.printStackTrace(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index 868fa89fe..4365e861f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -61,7 +61,7 @@ public class SparkOrganizationRelation implements Serializable { log.info("organization map : {}", new Gson().toJson(organizationMap)); final String communityMapPath = parser.get("communityMapPath"); - log.info("communityMapPath: {} ", communityMapPath); + log.info("communityMapPath: {}", communityMapPath); SparkConf conf = new SparkConf(); @@ -117,15 +117,12 @@ public class SparkOrganizationRelation implements Serializable { } })); - // if (relList.size() > 0) { spark .createDataset(relList, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); - // } - } @NotNull diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 00f604b14..d8a1b5c21 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -4,7 +4,9 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresults; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.*; +import java.util.List; +import java.util.Objects; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -14,16 +16,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.api.zenodo.Community; -import eu.dnetlib.dhp.oa.graph.dump.Constants; -import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.oaf.Relation; -import scala.Tuple2; /** * Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 1a28a21f4..2d2b04b5c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -18,7 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -62,6 +61,7 @@ public class SparkResultLinkedToProject implements Serializable { final String relationPath = parser.get("relationPath"); log.info("relationPath: {}", relationPath); + @SuppressWarnings("unchecked") Class inputClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); @@ -95,9 +95,9 @@ public class SparkResultLinkedToProject implements Serializable { ._2() .getId(), Encoders.STRING()) - .mapGroups((MapGroupsFunction, R>) (k, it) -> { - return it.next()._2(); - }, Encoders.bean(inputClazz)) + .mapGroups( + (MapGroupsFunction, R>) (k, it) -> it.next()._2(), + Encoders.bean(inputClazz)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java index 59bdb3914..f87c0eb7a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java @@ -74,24 +74,4 @@ public class DatasourceCompatibilityComparator implements Comparator return lClass.compareTo(rClass); } - /* - * CASE WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY - * ['openaire-cris_1.1']) THEN 'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT - * COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0']) THEN - * 'openaire4.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0']) THEN - * 'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE - * (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver']) THEN - * 'driver@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['openaire2.0']) THEN 'openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN - * (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0']) THEN - * 'openaire3.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data']) THEN - * 'openaire2.0_data@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE - * (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native']) THEN - * 'native@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['hostedBy']) THEN 'hostedBy@@@dnet:datasourceCompatibilityLevel' WHEN - * (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible']) - * THEN 'notCompatible@@@dnet:datasourceCompatibilityLevel' ELSE 'UNKNOWN@@@dnet:datasourceCompatibilityLevel' END - */ } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java index 602213e58..ef419a042 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java @@ -6,8 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.*; import java.util.stream.Collectors; -import javax.xml.crypto.Data; - import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -16,7 +14,6 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,7 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -55,9 +51,11 @@ public class MergeGraphTableSparkJob { String jsonConfiguration = IOUtils .toString( - CleanGraphSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json")); + Objects + .requireNonNull( + MergeGraphTableSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -133,7 +131,7 @@ public class MergeGraphTableSparkJob { HashSet collectedFromNames = Optional .ofNullable(o.getCollectedfrom()) .map(c -> c.stream().map(KeyValue::getValue).collect(Collectors.toCollection(HashSet::new))) - .orElse(new HashSet()); + .orElse(new HashSet<>()); return !collectedFromNames.contains("Datacite"); }) .write() diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 03c3eeb3c..9aa4e4c31 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -32,8 +32,6 @@ import org.dom4j.Document; import org.dom4j.DocumentFactory; import org.dom4j.DocumentHelper; import org.dom4j.Node; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -88,8 +86,6 @@ public abstract class AbstractMdRecordToOafMapper { protected static final Map nsContext = new HashMap<>(); - private static final Logger log = LoggerFactory.getLogger(DispatchEntitiesApplication.class); - static { nsContext.put("dr", "http://www.driver-repository.eu/namespace/dr"); nsContext.put("dri", "http://www.driver-repository.eu/namespace/dri"); @@ -117,9 +113,6 @@ public abstract class AbstractMdRecordToOafMapper { } public List processMdRecord(final String xml) { - - // log.info("Processing record: " + xml); - try { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); @@ -134,7 +127,7 @@ public abstract class AbstractMdRecordToOafMapper { doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); if (collectedFrom == null) { - return null; + return Lists.newArrayList(); } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) @@ -142,7 +135,7 @@ public abstract class AbstractMdRecordToOafMapper { : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); if (hostedBy == null) { - return null; + return Lists.newArrayList(); } final DataInfo info = prepareDataInfo(doc, invisible); @@ -161,21 +154,17 @@ public abstract class AbstractMdRecordToOafMapper { protected String getResultType(final Document doc, final List instances) { final String type = doc.valueOf("//dr:CobjCategory/@type"); - if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { + if (StringUtils.isBlank(type) && vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { final String instanceType = instances .stream() .map(i -> i.getInstancetype().getClassid()) .findFirst() - .map(s -> UNKNOWN.equalsIgnoreCase(s) ? "0000" : s) + .filter(s -> !UNKNOWN.equalsIgnoreCase(s)) .orElse("0000"); // Unknown return Optional .ofNullable(vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType)) - .map(q -> q.getClassid()) + .map(Qualifier::getClassid) .orElse("0000"); - /* - * .orElseThrow( () -> new IllegalArgumentException( String.format("'%s' not mapped in %s", instanceType, - * DNET_RESULT_TYPOLOGIES))); - */ } return type; @@ -185,7 +174,7 @@ public abstract class AbstractMdRecordToOafMapper { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { + if (StringUtils.isBlank(dsId) || StringUtils.isBlank(dsName)) { return null; } @@ -498,7 +487,6 @@ public abstract class AbstractMdRecordToOafMapper { accessRight.setSchemename(qualifier.getSchemename()); // TODO set the OAStatus - // accessRight.setOaStatus(...); return accessRight; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index bbfb7429f..9027b49d7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -127,7 +127,7 @@ public class GenerateEntitiesApplication { .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) .map(k -> convertToListOaf(k._1(), k._2(), shouldHashId, vocs)) .filter(Objects::nonNull) - .flatMap(list -> list.iterator())); + .flatMap(List::iterator)); } switch (mode) { @@ -135,7 +135,7 @@ public class GenerateEntitiesApplication { save( inputRdd .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) - .reduceByKey((o1, o2) -> OafMapperUtils.merge(o1, o2)) + .reduceByKey(OafMapperUtils::merge) .map(Tuple2::_2), targetPath); break; @@ -191,7 +191,7 @@ public class GenerateEntitiesApplication { case "otherresearchproduct": return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); default: - throw new RuntimeException("type not managed: " + type.toLowerCase()); + throw new IllegalArgumentException("type not managed: " + type.toLowerCase()); } } @@ -199,9 +199,9 @@ public class GenerateEntitiesApplication { try { return OBJECT_MAPPER.readValue(s, clazz); } catch (final Exception e) { - log.error("Error parsing object of class: " + clazz); + log.error("Error parsing object of class: {}", clazz); log.error(s); - throw new RuntimeException(e); + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java index d5c310c1b..ee1b6a5da 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java @@ -40,9 +40,11 @@ public class MergeClaimsApplication { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - MigrateMongoMdstoresApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + Objects + .requireNonNull( + MergeClaimsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")))); parser.parseArgument(args); Boolean isSparkSessionManaged = Optional diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index a9d3e05fe..b9033702d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -9,10 +9,7 @@ import java.io.IOException; import java.sql.Array; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.List; +import java.util.*; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Predicate; @@ -72,8 +69,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - MigrateDbEntitiesApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json"))); + Objects + .requireNonNull( + MigrateDbEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")))); parser.parseArgument(args); @@ -87,7 +86,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i log.info("postgresPassword: xxx"); final String dbSchema = parser.get("dbschema"); - log.info("dbSchema {}: " + dbSchema); + log.info("dbSchema {}: ", dbSchema); final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); @@ -659,18 +658,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i r1.setDataInfo(info); r1.setLastupdatetimestamp(lastUpdateTimestamp); - // removed because there's no difference between two sides //TODO -// final Relation r2 = new Relation(); -// r2.setRelType(ORG_ORG_RELTYPE); -// r2.setSubRelType(ORG_ORG_SUBRELTYPE); -// r2.setRelClass(relClass); -// r2.setSource(orgId2); -// r2.setTarget(orgId1); -// r2.setCollectedfrom(collectedFrom); -// r2.setDataInfo(info); -// r2.setLastupdatetimestamp(lastUpdateTimestamp); -// return Arrays.asList(r1, r2); - return Arrays.asList(r1); } catch (final Exception e) { throw new RuntimeException(e); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java index 1d4eca2c2..4110bd806 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.IOException; import java.io.StringReader; import java.text.SimpleDateFormat; import java.util.Arrays; @@ -82,11 +83,11 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication public static void processPaths(final SparkSession spark, final String outputPath, final Set paths, - final String type) throws Exception { + final String type) { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - log.info("Found " + paths.size() + " not empty mdstores"); + log.info("Found {} not empty mdstores", paths.size()); paths.forEach(log::info); final String[] validPaths = paths @@ -98,7 +99,7 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication spark .read() .parquet(validPaths) - .map((MapFunction) r -> enrichRecord(r), Encoders.STRING()) + .map((MapFunction) MigrateHdfsMdstoresApplication::enrichRecord, Encoders.STRING()) .toJavaRDD() .mapToPair(xml -> new Tuple2<>(new Text(UUID.randomUUID() + ":" + type), new Text(xml))) // .coalesce(1) @@ -120,7 +121,9 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication final String tranDate = dateFormat.format(new Date((Long) r.getAs("dateOfTransformation"))); try { - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); final Element head = (Element) doc.selectSingleNode("//*[local-name() = 'header']"); head.addElement(new QName("objIdentifier", DRI_NS_PREFIX)).addText(r.getAs("id")); head.addElement(new QName("dateOfCollection", DRI_NS_PREFIX)).addText(collDate); @@ -135,8 +138,7 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication private static Set mdstorePaths(final String mdstoreManagerUrl, final String format, final String layout, - final String interpretation) - throws Exception { + final String interpretation) throws IOException { final String url = mdstoreManagerUrl + "/mdstores/"; final ObjectMapper objectMapper = new ObjectMapper(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java index 3f6afbeac..6dbab96cb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java @@ -51,10 +51,10 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio public void execute(final String format, final String layout, final String interpretation) { final Map colls = mdstoreClient.validCollections(format, layout, interpretation); - log.info("Found " + colls.size() + " mdstores"); + log.info("Found {} mdstores", colls.size()); for (final Entry entry : colls.entrySet()) { - log.info("Processing mdstore " + entry.getKey() + " (collection: " + entry.getValue() + ")"); + log.info("Processing mdstore {} (collection: {})", entry.getKey(), entry.getValue()); final String currentColl = entry.getValue(); for (final String xml : mdstoreClient.listRecords(currentColl)) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index d753cddeb..2b49a9dc1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -19,7 +19,6 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -56,8 +55,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() - .replaceAll(" ", "") - .replaceAll("_", ""); + .replace(" ", "") + .replace("_", ""); author.setPid(new ArrayList<>()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 7925a7826..02aab4f16 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -13,7 +13,6 @@ import org.dom4j.Node; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -88,11 +87,11 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() - .replaceAll(" ", "") - .replaceAll("_", ""); + .replace(" ", "") + .replace("_", ""); if (type.toLowerCase().startsWith(ORCID)) { - final String cleanedId = id.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", ""); + final String cleanedId = id.replace("http://orcid.org/", "").replace("https://orcid.org/", ""); res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); } else if (type.startsWith("MAGID")) { res.add(structuredProperty(id, MAG_PID_TYPE, info)); @@ -388,7 +387,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareResultPids(final Document doc, final DataInfo info) { - final Set res = new HashSet(); + final Set res = new HashSet<>(); res .addAll( prepareListStructPropsWithValidQualifier( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java index 5523863ff..edfd65299 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -4,12 +4,10 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.FileNotFoundException; -import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java index a0ce4f5a6..5d32fe926 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java @@ -12,6 +12,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Oaf; @@ -34,7 +35,7 @@ public class AbstractMigrationApplication implements Closeable { this.writer = null; } - public AbstractMigrationApplication(final String hdfsPath) throws Exception { + public AbstractMigrationApplication(final String hdfsPath) throws IOException { log.info(String.format("Creating SequenceFile Writer, hdfsPath=%s", hdfsPath)); @@ -46,15 +47,14 @@ public class AbstractMigrationApplication implements Closeable { SequenceFile.Writer.valueClass(Text.class)); } - private Configuration getConf() throws IOException { - final Configuration conf = new Configuration(); + private Configuration getConf() { + return new Configuration(); /* * conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.hdfs.impl", * org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", * org.apache.hadoop.fs.LocalFileSystem.class.getName()); System.setProperty("HADOOP_USER_NAME", hdfsUser); * System.setProperty("hadoop.home.dir", "/"); FileSystem.get(URI.create(hdfsNameNode), conf); */ - return conf; } protected void emit(final String s, final String type) { @@ -62,16 +62,16 @@ public class AbstractMigrationApplication implements Closeable { key.set(counter.getAndIncrement() + ":" + type); value.set(s); writer.append(key, value); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (final IOException e) { + throw new IllegalStateException(e); } } protected void emitOaf(final Oaf oaf) { try { emit(objectMapper.writeValueAsString(oaf), oaf.getClass().getSimpleName().toLowerCase()); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (JsonProcessingException e) { + throw new IllegalStateException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java index 32f6e7abc..afaac04ea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java @@ -68,7 +68,7 @@ public class GraphHiveImporterJobTest { } @Test - public void testImportGraphAsHiveDB() throws Exception { + void testImportGraphAsHiveDB() throws Exception { GraphHiveImporterJob .main( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index b196d1948..edcd72ab4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -50,7 +50,7 @@ public class GraphCleaningFunctionsTest { } @Test - public void testCleaning() throws Exception { + void testCleaning() throws Exception { assertNotNull(vocabularies); assertNotNull(mapping); @@ -166,10 +166,6 @@ public class GraphCleaningFunctionsTest { // TODO add more assertions to verity the cleaned values System.out.println(MAPPER.writeValueAsString(p_cleaned)); - - /* - * assertTrue( p_out .getPid() .stream() .allMatch(sp -> StringUtils.isNotBlank(sp.getValue()))); - */ } private Stream getAuthorPidTypes(Result pub) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java index 803ae0416..697ec705f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java @@ -10,10 +10,10 @@ import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.schema.dump.oaf.graph.*; @Disabled -public class GenerateJsonSchema { +class GenerateJsonSchema { @Test - public void generateSchema() { + void generateSchema() { SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON) .with(Option.SCHEMA_VERSION_INDICATOR) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java index 51e4e1033..41b906e58 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java @@ -22,7 +22,7 @@ public class MakeTarTest { } @Test - public void testTar() throws IOException { + void testTar() throws IOException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); fs diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java index d5a9ba8dd..03eed7a45 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.oa.graph.dump; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.HashMap; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; @@ -15,7 +16,6 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.slf4j.Logger; @@ -37,8 +37,6 @@ public class PrepareResultProjectJobTest { private static final Logger log = LoggerFactory .getLogger(eu.dnetlib.dhp.oa.graph.dump.PrepareResultProjectJobTest.class); - private static final HashMap map = new HashMap<>(); - @BeforeAll public static void beforeAll() throws IOException { workingDir = Files @@ -69,7 +67,7 @@ public class PrepareResultProjectJobTest { } @Test - public void testNoMatch() throws Exception { + void testNoMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/no_match") @@ -90,12 +88,12 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertEquals(0, verificationDataset.count()); + assertEquals(0, verificationDataset.count()); } @Test - public void testMatchOne() throws Exception { + void testMatchOne() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_one") @@ -116,12 +114,11 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertTrue(verificationDataset.count() == 1); + assertEquals(1, verificationDataset.count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); verificationDataset.createOrReplaceTempView("table"); @@ -131,14 +128,14 @@ public class PrepareResultProjectJobTest { "from table " + "lateral view explode (projectsList) pl as projList"); - Assertions.assertEquals(1, check.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + assertEquals(1, check.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); verificationDataset.show(false); } @Test - public void testMatch() throws Exception { + void testMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match") @@ -159,16 +156,14 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertTrue(verificationDataset.count() == 2); + assertEquals(2, verificationDataset.count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); verificationDataset.createOrReplaceTempView("dataset"); @@ -177,62 +172,54 @@ public class PrepareResultProjectJobTest { + "lateral view explode(projectsList) p as MyT "; org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query); - Assertions.assertEquals(3, resultExplodedProvenance.count()); - Assertions - .assertEquals( - 2, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals(3, resultExplodedProvenance.count()); + assertEquals( + 2, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); - Assertions - .assertEquals( - 2, - resultExplodedProvenance - .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") - .count()); + assertEquals( + 2, + resultExplodedProvenance + .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + assertEquals( + 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java index c6666342a..98902b618 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java @@ -14,12 +14,13 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.xml.sax.SAXException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class QueryInformationSystemTest { +class QueryInformationSystemTest { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + @@ -66,7 +67,7 @@ public class QueryInformationSystemTest { private Map map; @BeforeEach - public void setUp() throws ISLookUpException, DocumentException { + public void setUp() throws ISLookUpException, DocumentException, SAXException { lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap); queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(isLookUpService); @@ -74,13 +75,13 @@ public class QueryInformationSystemTest { } @Test - public void testSize() throws ISLookUpException { + void testSize() throws ISLookUpException { Assertions.assertEquals(23, map.size()); } @Test - public void testContent() { + void testContent() { Assertions.assertTrue(map.containsKey("egi") && map.get("egi").equals("EGI Federation")); Assertions.assertTrue(map.containsKey("fet-fp7") && map.get("fet-fp7").equals("FET FP7")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java index 42ad5634a..2ea4bc91a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java @@ -62,7 +62,7 @@ public class SplitForCommunityTest { } @Test - public void test1() { + void test1() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java index 20a46cee0..a164593ec 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java @@ -68,7 +68,7 @@ public class UpdateProjectInfoTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 05dc423cb..8d06758a8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -32,7 +32,7 @@ public class ZenodoUploadTest { } @Test - public void testNewDeposition() throws IOException { + void testNewDeposition() throws IOException { CommunityMap communityMap = new CommunityMap(); communityMap.put("ni", "Neuroinformatics"); communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage"); @@ -86,7 +86,7 @@ public class ZenodoUploadTest { } @Test - public void testNewVersion() throws IOException, MissingConceptDoiException { + void testNewVersion() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -137,7 +137,7 @@ public class ZenodoUploadTest { } @Test - public void readCommunityMap() throws IOException { + void readCommunityMap() throws IOException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); System.out .println( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index 3ecbd1894..f881e6b30 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -86,7 +86,7 @@ public class CreateEntityTest { } @Test - public void test1() throws ISLookUpException, IOException { + void test1() throws ISLookUpException, IOException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.getContextInformation(consumer); @@ -144,7 +144,7 @@ public class CreateEntityTest { @Test @Disabled - public void test2() throws IOException, ISLookUpException { + void test2() throws IOException, ISLookUpException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); Path hdfsWritePath = new Path(workingDir + "/prova"); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java index b556fa2d6..69e550d45 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java @@ -16,7 +16,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.utils.DHPUtils; -public class CreateRelationTest { +class CreateRelationTest { List communityContext = Arrays .asList( @@ -473,7 +473,7 @@ public class CreateRelationTest { } @Test - public void test1() { + void test1() { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java index 3d42f124e..e43383ef4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java @@ -60,7 +60,7 @@ public class ExtractRelationFromEntityTest { } @Test - public void test1() { + void test1() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java index 75d5a2673..eb5919fd5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java @@ -4,13 +4,14 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.schema.dump.oaf.graph.Funder; -public class FunderParsingTest { +class FunderParsingTest { @Test - public void testFunderTwoLevels() throws DocumentException { + void testFunderTwoLevels() throws DocumentException { String funding_Stream = "nsf_________::NSFNSFNational Science " + @@ -37,7 +38,7 @@ public class FunderParsingTest { } @Test - public void testFunderThreeeLevels() throws DocumentException { + void testFunderThreeeLevels() throws DocumentException, SAXException { String funding_stream = "ec__________::EC" + "EC" + "European Commission" + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java index d769aa138..049959704 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java @@ -17,7 +17,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class QueryInformationSystemTest { +class QueryInformationSystemTest { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + @@ -513,7 +513,7 @@ public class QueryInformationSystemTest { } @Test - public void testSizeEntity() throws ISLookUpException { + void testSizeEntity() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -523,7 +523,7 @@ public class QueryInformationSystemTest { } @Test - public void testSizeRelation() throws ISLookUpException { + void testSizeRelation() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -534,7 +534,7 @@ public class QueryInformationSystemTest { } @Test - public void testContentRelation() throws ISLookUpException { + void testContentRelation() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -572,7 +572,7 @@ public class QueryInformationSystemTest { } @Test - public void testContentEntity() throws ISLookUpException { + void testContentEntity() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java index ea2dc73ca..50a9f26b4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java @@ -69,7 +69,7 @@ public class RelationFromOrganizationTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java index 6c5ebbab3..d1e3b3acc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java @@ -69,7 +69,7 @@ public class ResultLinkedToProjectTest { } @Test - public void testNoMatch() throws Exception { + void testNoMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json") @@ -102,7 +102,7 @@ public class ResultLinkedToProjectTest { } @Test - public void testMatchOne() throws Exception { + void testMatchOne() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java index 71bf5d942..8ac0c552f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java @@ -65,7 +65,7 @@ public class SplitPerFunderTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java index 0089811cf..2d28ee305 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java @@ -15,7 +15,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Datasource; -public class MergeGraphTableSparkJobTest { +class MergeGraphTableSparkJobTest { private ObjectMapper mapper; @@ -25,7 +25,7 @@ public class MergeGraphTableSparkJobTest { } @Test - public void testMergeDatasources() throws IOException { + void testMergeDatasources() throws IOException { assertEquals( "openaire-cris_1.1", MergeGraphTableSparkJob diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index e0d202209..1e974dd69 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -24,7 +24,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class GenerateEntitiesApplicationTest { +class GenerateEntitiesApplicationTest { @Mock private ISLookUpService isLookUpService; @@ -44,7 +44,7 @@ public class GenerateEntitiesApplicationTest { } @Test - public void testMergeResult() throws IOException { + void testMergeResult() throws IOException { Result publication = getResult("oaf_record.xml", Publication.class); Result dataset = getResult("odf_dataset.xml", Dataset.class); Result software = getResult("odf_software.xml", Software.class); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c431b4dd8..000dbfe25 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -8,6 +8,7 @@ import static org.mockito.Mockito.lenient; import java.io.IOException; import java.util.List; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -21,7 +22,6 @@ import org.mockito.junit.jupiter.MockitoExtension; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -29,7 +29,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class MappersTest { +class MappersTest { @Mock private ISLookUpService isLookUpService; @@ -50,7 +50,7 @@ public class MappersTest { @Test void testPublication() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -71,7 +71,7 @@ public class MappersTest { assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertFalse(p.getDataInfo().getInvisible()); - assertTrue(p.getSource().size() == 1); + assertEquals(1, p.getSource().size()); assertTrue(StringUtils.isNotBlank(p.getDateofcollection())); assertTrue(StringUtils.isNotBlank(p.getDateoftransformation())); @@ -88,7 +88,7 @@ public class MappersTest { .getPid() .stream() .findFirst() - .get(); + .orElseThrow(() -> new IllegalStateException("missing author pid")); assertEquals("0000-0001-6651-1178", pid.getValue()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); @@ -108,7 +108,6 @@ public class MappersTest { assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); @@ -141,17 +140,15 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r2.getRelType())); assertTrue(r1.getValidated()); assertTrue(r2.getValidated()); - assertEquals(r1.getValidationDate(), "2020-01-01"); - assertEquals(r2.getValidationDate(), "2020-01-01"); - // System.out.println(new ObjectMapper().writeValueAsString(p)); - // System.out.println(new ObjectMapper().writeValueAsString(r1)); - // System.out.println(new ObjectMapper().writeValueAsString(r2)); + assertEquals("2020-01-01", r1.getValidationDate()); + assertEquals("2020-01-01", r2.getValidationDate()); } @Test void testPublication_PubMed() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record_pubmed.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record_pubmed.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -196,23 +193,22 @@ public class MappersTest { assertTrue(p.getSubject().size() > 0); assertTrue(p.getPid().size() > 0); - assertEquals(p.getPid().get(0).getValue(), "PMC1517292"); - assertEquals(p.getPid().get(0).getQualifier().getClassid(), "pmc"); + assertEquals("PMC1517292", p.getPid().get(0).getValue()); + assertEquals("pmc", p.getPid().get(0).getQualifier().getClassid()); assertNotNull(p.getInstance()); assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); }); assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); assertNotNull(p.getInstance().get(0).getPid()); - assertTrue(p.getInstance().get(0).getPid().size() == 2); + assertEquals(2, p.getInstance().get(0).getPid().size()); - assertTrue(p.getInstance().get(0).getAlternateIdentifier().size() == 1); + assertEquals(1, p.getInstance().get(0).getAlternateIdentifier().size()); assertEquals("doi", p.getInstance().get(0).getAlternateIdentifier().get(0).getQualifier().getClassid()); assertEquals("10.3897/oneeco.2.e13718", p.getInstance().get(0).getAlternateIdentifier().get(0).getValue()); @@ -223,7 +219,7 @@ public class MappersTest { @Test void testPublicationInvisible() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); final List list = new OafToOafMapper(vocs, true, true).processMdRecord(xml); @@ -238,7 +234,7 @@ public class MappersTest { @Test void testDataset() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -281,12 +277,13 @@ public class MappersTest { .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .findFirst(); assertTrue(author.isPresent()); - final StructuredProperty pid = author + final Optional oPid = author .get() .getPid() .stream() - .findFirst() - .get(); + .findFirst(); + assertTrue(oPid.isPresent()); + final StructuredProperty pid = oPid.get(); assertEquals("0000-0001-9074-1619", pid.getValue()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); @@ -315,7 +312,6 @@ public class MappersTest { assertTrue(d.getInstance().size() > 0); d .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); @@ -345,13 +341,14 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r2.getRelType())); assertTrue(r1.getValidated()); assertTrue(r2.getValidated()); - assertEquals(r1.getValidationDate(), "2020-01-01"); - assertEquals(r2.getValidationDate(), "2020-01-01"); + assertEquals("2020-01-01", r1.getValidationDate()); + assertEquals("2020-01-01", r2.getValidationDate()); } @Test void testOdfBielefeld() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_bielefeld.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_bielefeld.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -389,7 +386,6 @@ public class MappersTest { assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); @@ -399,7 +395,8 @@ public class MappersTest { @Test void testOpentrial() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_opentrial.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_opentrial.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -444,7 +441,7 @@ public class MappersTest { assertEquals(1, d.getDescription().size()); assertTrue(StringUtils.isNotBlank(d.getDescription().get(0).getValue())); - assertTrue(d.getAuthor().size() == 1); + assertEquals(1, d.getAuthor().size()); assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname()); assertEquals("Kristian K.", d.getAuthor().get(0).getName()); assertEquals("Jensen", d.getAuthor().get(0).getSurname()); @@ -462,7 +459,7 @@ public class MappersTest { assertTrue(d.getContext().isEmpty()); assertNotNull(d.getInstance()); - assertTrue(d.getInstance().size() == 1); + assertEquals(1, d.getInstance().size()); final Instance i = d.getInstance().get(0); @@ -515,7 +512,7 @@ public class MappersTest { @Test void testSoftware() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_software.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -532,22 +529,15 @@ public class MappersTest { assertTrue(s.getInstance().size() > 0); } - // @Test - void testDataset_2() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset_2.xml")); - - final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); - - System.out.println("***************"); - System.out.println(new ObjectMapper().writeValueAsString(list)); - System.out.println("***************"); - } - @Test void testClaimDedup() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_dedup.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_dedup.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + assertNotNull(list); + assertFalse(list.isEmpty()); + System.out.println("***************"); System.out.println(new ObjectMapper().writeValueAsString(list)); System.out.println("***************"); @@ -555,7 +545,7 @@ public class MappersTest { @Test void testNakala() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_nakala.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_nakala.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -583,7 +573,7 @@ public class MappersTest { @Test void testEnermaps() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("enermaps.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("enermaps.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -608,7 +598,8 @@ public class MappersTest { @Test void testClaimFromCrossref() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_crossref.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_crossref.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -624,7 +615,7 @@ public class MappersTest { @Test void testODFRecord() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_record.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); System.out.println(new ObjectMapper().writeValueAsString(list)); @@ -638,7 +629,7 @@ public class MappersTest { @Test void testTextGrid() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("textgrid.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("textgrid.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -652,9 +643,9 @@ public class MappersTest { assertEquals(1, p.getAuthor().size()); assertEquals("OPEN", p.getBestaccessright().getClassid()); - assertTrue(p.getPid().size() == 1); + assertEquals(1, p.getPid().size()); assertTrue(PidType.isValid(p.getPid().get(0).getQualifier().getClassid())); - assertTrue(PidType.handle.equals(PidType.valueOf(p.getPid().get(0).getQualifier().getClassid()))); + assertEquals(PidType.handle, PidType.valueOf(p.getPid().get(0).getQualifier().getClassid())); assertEquals("hdl:11858/00-1734-0000-0003-EE73-2", p.getPid().get(0).getValue()); assertEquals("dataset", p.getResulttype().getClassname()); assertEquals(1, p.getInstance().size()); @@ -672,7 +663,7 @@ public class MappersTest { @Test void testBologna() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf-bologna.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -689,7 +680,7 @@ public class MappersTest { @Test void testJairo() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_jairo.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_jairo.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -702,7 +693,7 @@ public class MappersTest { assertNotNull(p.getTitle()); assertFalse(p.getTitle().isEmpty()); - assertTrue(p.getTitle().size() == 1); + assertEquals(1, p.getTitle().size()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); final Publication p_cleaned = cleanup(fixVocabularyNames(p)); @@ -713,7 +704,8 @@ public class MappersTest { @Test void testOdfFromHdfs() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_from_hdfs.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_from_hdfs.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -749,7 +741,6 @@ public class MappersTest { assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("UNKNOWN", i.getAccessright().getClassid()); @@ -768,13 +759,16 @@ public class MappersTest { private List vocs() throws IOException { return IOUtils .readLines( - GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt")); + Objects + .requireNonNull(MappersTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"))); } private List synonyms() throws IOException { return IOUtils .readLines( - GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")); + Objects + .requireNonNull( + MappersTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"))); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index d529d2eb2..69943435a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -32,7 +32,7 @@ import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @ExtendWith(MockitoExtension.class) -public class MigrateDbEntitiesApplicationTest { +class MigrateDbEntitiesApplicationTest { private MigrateDbEntitiesApplication app; @@ -58,7 +58,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessDatasource() throws Exception { + void testProcessDatasource() throws Exception { final List fields = prepareMocks("datasources_resultset_entry.json"); final List list = app.processDatasource(rs); @@ -81,7 +81,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessProject() throws Exception { + void testProcessProject() throws Exception { final List fields = prepareMocks("projects_resultset_entry.json"); final List list = app.processProject(rs); @@ -99,7 +99,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessOrganization() throws Exception { + void testProcessOrganization() throws Exception { final List fields = prepareMocks("organizations_resultset_entry.json"); final List list = app.processOrganization(rs); @@ -126,7 +126,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessDatasourceOrganization() throws Exception { + void testProcessDatasourceOrganization() throws Exception { final List fields = prepareMocks("datasourceorganization_resultset_entry.json"); final List list = app.processDatasourceOrganization(rs); @@ -143,7 +143,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessProjectOrganization() throws Exception { + void testProcessProjectOrganization() throws Exception { final List fields = prepareMocks("projectorganization_resultset_entry.json"); final List list = app.processProjectOrganization(rs); @@ -162,7 +162,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessClaims_context() throws Exception { + void testProcessClaims_context() throws Exception { final List fields = prepareMocks("claimscontext_resultset_entry.json"); final List list = app.processClaims(rs); @@ -177,7 +177,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessClaims_rels() throws Exception { + void testProcessClaims_rels() throws Exception { final List fields = prepareMocks("claimsrel_resultset_entry.json"); final List list = app.processClaims(rs); @@ -208,9 +208,6 @@ public class MigrateDbEntitiesApplicationTest { assertValidId(r1.getCollectedfrom().get(0).getKey()); assertValidId(r2.getCollectedfrom().get(0).getKey()); - - // System.out.println(new ObjectMapper().writeValueAsString(r1)); - // System.out.println(new ObjectMapper().writeValueAsString(r2)); } private List prepareMocks(final String jsonFile) throws IOException, SQLException { @@ -273,7 +270,7 @@ public class MigrateDbEntitiesApplicationTest { final String[] values = ((List) tf.getValue()) .stream() .filter(Objects::nonNull) - .map(o -> o.toString()) + .map(Object::toString) .toArray(String[]::new); Mockito.when(arr.getArray()).thenReturn(values); @@ -334,6 +331,7 @@ public class MigrateDbEntitiesApplicationTest { return new Float(getValueAs(name, fields).toString()); } + @SuppressWarnings("unchecked") private T getValueAs(final String name, final List fields) { return fields .stream() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java index fb2c90e5c..3b9616de3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java @@ -26,8 +26,6 @@ import io.fares.junit.mongodb.MongoForAllExtension; @Disabled public class MigrateMongoMdstoresApplicationTest { - private static final Logger log = LoggerFactory.getLogger(MigrateMongoMdstoresApplicationTest.class); - public static final String COLL_NAME = "9eed8a4d-bb41-47c3-987f-9d06aee0dec0::1453898911558"; @RegisterExtension diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java index 3fd365416..c8158c044 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -75,7 +75,7 @@ public class PatchRelationApplicationTest { } @Test - public void testPatchRelationApplication() throws Exception { + void testPatchRelationApplication() throws Exception { final String graphBasePath = workingDir.toString() + "/graphBasePath"; PatchRelationsApplication.main(new String[] { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java index 110fabf45..ec059ad73 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java @@ -9,7 +9,7 @@ import java.util.List; import org.junit.jupiter.api.Test; -public class ReflectionTest { +class ReflectionTest { private final Cleaner cleaner = new Cleaner(); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 7534ce4bd..3301577ee 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -46,9 +46,11 @@ public class CreateRelatedEntitiesJob_phase1 { String jsonConfiguration = IOUtils .toString( - PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); + Objects + .requireNonNull( + CreateRelatedEntitiesJob_phase1.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -146,7 +148,11 @@ public class CreateRelatedEntitiesJob_phase1 { Result result = (Result) entity; if (result.getTitle() != null && !result.getTitle().isEmpty()) { - final StructuredProperty title = result.getTitle().stream().findFirst().get(); + final StructuredProperty title = result + .getTitle() + .stream() + .findFirst() + .orElseThrow(() -> new IllegalStateException("missing title in " + entity.getId())); title.setValue(StringUtils.left(title.getValue(), ModelHardLimits.MAX_TITLE_LENGTH)); re.setTitle(title); } @@ -196,7 +202,7 @@ public class CreateRelatedEntitiesJob_phase1 { List> f = p.getFundingtree(); if (!f.isEmpty()) { - re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList())); + re.setFundingtree(f.stream().map(Field::getValue).collect(Collectors.toList())); } break; } @@ -211,7 +217,7 @@ public class CreateRelatedEntitiesJob_phase1 { return Optional .ofNullable(f) .filter(Objects::nonNull) - .map(x -> x.getValue()) + .map(Field::getValue) .orElse(defaultValue); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java index c013a2bf6..85fb4a6b2 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java @@ -49,9 +49,11 @@ public class CreateRelatedEntitiesJob_phase2 { String jsonConfiguration = IOUtils .toString( - PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase2.json")); + Objects + .requireNonNull( + CreateRelatedEntitiesJob_phase2.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase2.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index b3f785492..ae899c3d8 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -131,14 +131,14 @@ public class PrepareRelationsJob { Set relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) - .filter(rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter(rel -> relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())) == false); + .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass()))); JavaRDD pruned = pruneRels( pruneRels( rels, - sourceMaxRelations, relPartitions, (Function) r -> r.getSource()), - targetMaxRelations, relPartitions, (Function) r -> r.getTarget()); + sourceMaxRelations, relPartitions, (Function) Relation::getSource), + targetMaxRelations, relPartitions, (Function) Relation::getTarget); spark .createDataset(pruned.rdd(), Encoders.bean(Relation.class)) .repartition(relPartitions) @@ -170,8 +170,8 @@ public class PrepareRelationsJob { .map( (MapFunction) s -> OBJECT_MAPPER.readValue(s, Relation.class), Encoders.kryo(Relation.class)) - .filter((FilterFunction) rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter((FilterFunction) rel -> relationFilter.contains(rel.getRelClass()) == false) + .filter((FilterFunction) rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter((FilterFunction) rel -> !relationFilter.contains(rel.getRelClass())) .groupByKey( (MapFunction) Relation::getSource, Encoders.STRING()) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java index 28c1111d6..01d161b6b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.oa.provision; public class ProvisionConstants { + private ProvisionConstants() { + } + public static final String LAYOUT = "index"; public static final String INTERPRETATION = "openaire"; public static final String SEPARATOR = "-"; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java index 410aff5ba..0033978bf 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java @@ -17,7 +17,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; import eu.dnetlib.dhp.oa.provision.utils.ZkServers; import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class SolrAdminApplication implements Closeable { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java index b44ed7446..b383f67ef 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java @@ -22,7 +22,6 @@ import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Maps; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -40,7 +39,7 @@ public class XmlConverterJob { private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class); - public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; + public static final String SCHEMA_LOCATION = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; public static void main(String[] args) throws Exception { @@ -95,7 +94,7 @@ public class XmlConverterJob { prepareAccumulators(spark.sparkContext()), contextMapper, false, - schemaLocation, + SCHEMA_LOCATION, otherDsTypeId); final List paths = HdfsSupport @@ -186,7 +185,7 @@ public class XmlConverterJob { accumulators .put( "organizationOrganization_dedup_merges", - sc.longAccumulator("resultProject_outcome_produces")); + sc.longAccumulator("organizationOrganization_dedup_merges")); accumulators .put( "datasourceOrganization_provision_isProvidedBy", diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java index a321bdba9..e7dbdbd2b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java @@ -3,14 +3,12 @@ package eu.dnetlib.dhp.oa.provision; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Optional; -import javax.swing.text.html.Option; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.stream.StreamResult; @@ -163,7 +161,7 @@ public class XmlIndexingJob { case HDFS: spark .createDataset( - docs.map(s -> new SerializableSolrInputDocument(s)).rdd(), + docs.map(SerializableSolrInputDocument::new).rdd(), Encoders.kryo(SerializableSolrInputDocument.class)) .write() .mode(SaveMode.Overwrite) @@ -174,14 +172,13 @@ public class XmlIndexingJob { } } - protected static String toIndexRecord(Transformer tr, final String record) { + protected static String toIndexRecord(Transformer tr, final String xmlRecord) { final StreamResult res = new StreamResult(new StringWriter()); try { - tr.transform(new StreamSource(new StringReader(record)), res); + tr.transform(new StreamSource(new StringReader(xmlRecord)), res); return res.getWriter().toString(); - } catch (Throwable e) { - log.error("XPathException on record: \n {}", record, e); - throw new IllegalArgumentException(e); + } catch (TransformerException e) { + throw new IllegalArgumentException("XPathException on record: \n" + xmlRecord, e); } } @@ -192,8 +189,6 @@ public class XmlIndexingJob { * @param xslt xslt for building the index record transformer * @param fields the list of fields * @return the javax.xml.transform.Transformer - * @throws ISLookUpException could happen - * @throws IOException could happen * @throws TransformerException could happen */ protected static String getLayoutTransformer(String format, String fields, String xslt) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java index 2eb9cf38b..0fb109fbb 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.oa.provision.model; import java.io.Serializable; -import java.util.ArrayList; import java.util.LinkedList; import java.util.List; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index c09ed86e5..d4ee24c14 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -11,6 +11,9 @@ import eu.dnetlib.dhp.schema.common.ModelSupport; public class ProvisionModelSupport { + private ProvisionModelSupport() { + } + public static Class[] getModelClasses() { List> modelClasses = Lists.newArrayList(ModelSupport.getOafModelClasses()); modelClasses diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java index e15ceff76..5c78d1826 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java @@ -34,7 +34,6 @@ public class RelatedEntity implements Serializable { private Qualifier datasourcetype; private Qualifier datasourcetypeui; private Qualifier openairecompatibility; - // private String aggregatortype; // organization private String legalname; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java index 7391569ed..a91050403 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java @@ -5,7 +5,6 @@ import java.util.Comparator; import java.util.Optional; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java index ac418f2b9..bcaf40603 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java @@ -9,6 +9,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; @@ -23,7 +24,7 @@ public class ContextMapper extends HashMap implements Serial private static final String XQUERY = "for $x in //RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType']//*[name()='context' or name()='category' or name()='concept'] return "; public static ContextMapper fromIS(final String isLookupUrl) - throws DocumentException, ISLookUpException { + throws DocumentException, ISLookUpException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); StringBuilder sb = new StringBuilder(""); Joiner.on("").appendTo(sb, isLookUp.quickSearchProfile(XQUERY)); @@ -31,10 +32,12 @@ public class ContextMapper extends HashMap implements Serial return fromXml(sb.toString()); } - public static ContextMapper fromXml(final String xml) throws DocumentException { + public static ContextMapper fromXml(final String xml) throws DocumentException, SAXException { final ContextMapper contextMapper = new ContextMapper(); - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); for (Object o : doc.selectNodes("//entry")) { Node node = (Node) o; String id = node.valueOf("./@id"); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java index d2131ef28..3750d0173 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java @@ -8,16 +8,18 @@ import java.util.Set; import com.google.common.collect.Sets; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; public class GraphMappingUtils { public static final String SEPARATOR = "_"; - public static Set authorPidTypes = Sets + public static final Set authorPidTypes = Sets .newHashSet( ModelConstants.ORCID, ModelConstants.ORCID_PENDING, "magidentifier"); + private GraphMappingUtils() { + } + public static String removePrefix(final String s) { if (s.contains("|")) return substringAfter(s, "|"); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java index 29a51cb29..8c7c61361 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java @@ -25,11 +25,9 @@ public class ISLookupClient { * * @param format the Metadata format name * @return the string representation of the list of fields to be indexed - * @throws ISLookUpDocumentNotFoundException * @throws ISLookUpException */ - public String getLayoutSource(final String format) - throws ISLookUpDocumentNotFoundException, ISLookUpException { + public String getLayoutSource(final String format) throws ISLookUpException { return doLookup( String .format( @@ -41,7 +39,6 @@ public class ISLookupClient { * Method retrieves from the information system the openaireLayoutToRecordStylesheet * * @return the string representation of the XSLT contained in the transformation rule profile - * @throws ISLookUpDocumentNotFoundException * @throws ISLookUpException */ public String getLayoutTransformer() throws ISLookUpException { @@ -78,9 +75,9 @@ public class ISLookupClient { } private String doLookup(String xquery) throws ISLookUpException { - log.info(String.format("running xquery: %s", xquery)); + log.info("running xquery: {}", xquery); final String res = getIsLookup().getResourceProfileByQuery(xquery); - log.info(String.format("got response (100 chars): %s", StringUtils.left(res, 100) + " ...")); + log.info("got response (100 chars): {} ...", StringUtils.left(res, 100)); return res; } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java deleted file mode 100644 index 9dbac1936..000000000 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java +++ /dev/null @@ -1,69 +0,0 @@ - -package eu.dnetlib.dhp.oa.provision.utils; - -import java.util.Comparator; - -import eu.dnetlib.dhp.schema.oaf.Qualifier; - -public class LicenseComparator implements Comparator { - - @Override - public int compare(Qualifier left, Qualifier right) { - - if (left == null && right == null) - return 0; - if (left == null) - return 1; - if (right == null) - return -1; - - String lClass = left.getClassid(); - String rClass = right.getClassid(); - - if (lClass.equals(rClass)) - return 0; - - if (lClass.equals("OPEN SOURCE")) - return -1; - if (rClass.equals("OPEN SOURCE")) - return 1; - - if (lClass.equals("OPEN")) - return -1; - if (rClass.equals("OPEN")) - return 1; - - if (lClass.equals("6MONTHS")) - return -1; - if (rClass.equals("6MONTHS")) - return 1; - - if (lClass.equals("12MONTHS")) - return -1; - if (rClass.equals("12MONTHS")) - return 1; - - if (lClass.equals("EMBARGO")) - return -1; - if (rClass.equals("EMBARGO")) - return 1; - - if (lClass.equals("RESTRICTED")) - return -1; - if (rClass.equals("RESTRICTED")) - return 1; - - if (lClass.equals("CLOSED")) - return -1; - if (rClass.equals("CLOSED")) - return 1; - - if (lClass.equals("UNKNOWN")) - return -1; - if (rClass.equals("UNKNOWN")) - return 1; - - // Else (but unlikely), lexicographical ordering will do. - return lClass.compareTo(rClass); - } -} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java index f16ee260f..36028be9e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java @@ -3,10 +3,10 @@ package eu.dnetlib.dhp.oa.provision.utils; import java.io.StringReader; import java.io.StringWriter; -import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Objects; import javax.xml.stream.*; import javax.xml.stream.events.Namespace; @@ -57,13 +57,13 @@ public class StreamingInputDocumentFactory { private static final int MAX_FIELD_LENGTH = 25000; private final ThreadLocal inputFactory = ThreadLocal - .withInitial(() -> XMLInputFactory.newInstance()); + .withInitial(XMLInputFactory::newInstance); private final ThreadLocal outputFactory = ThreadLocal - .withInitial(() -> XMLOutputFactory.newInstance()); + .withInitial(XMLOutputFactory::newInstance); private final ThreadLocal eventFactory = ThreadLocal - .withInitial(() -> XMLEventFactory.newInstance()); + .withInitial(XMLEventFactory::newInstance); private final String version; @@ -126,6 +126,8 @@ public class StreamingInputDocumentFactory { return indexDocument; } catch (XMLStreamException e) { throw new IllegalStateException(e); + } finally { + inputFactory.remove(); } } @@ -143,9 +145,9 @@ public class StreamingInputDocumentFactory { /** * Parse the targetFields block and add fields to the solr document. * - * @param indexDocument - * @param parser - * @throws XMLStreamException + * @param indexDocument the document being populated + * @param parser the XML parser + * @throws XMLStreamException when the parser cannot parse the XML */ protected void parseTargetFields( final SolrInputDocument indexDocument, final XMLEventReader parser) @@ -165,9 +167,10 @@ public class StreamingInputDocumentFactory { final XMLEvent text = parser.nextEvent(); String data = getText(text); - - addField(indexDocument, fieldName, data); - hasFields = true; + if (Objects.nonNull(data)) { + addField(indexDocument, fieldName, data); + hasFields = true; + } } } @@ -192,36 +195,43 @@ public class StreamingInputDocumentFactory { final List nsList, final String dnetResult) throws XMLStreamException { + final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(results); + final XMLEventFactory xmlEventFactory = this.eventFactory.get(); + try { - for (Namespace ns : nsList) { - eventFactory.get().createNamespace(ns.getPrefix(), ns.getNamespaceURI()); - } - - StartElement newRecord = eventFactory.get().createStartElement("", null, RESULT, null, nsList.iterator()); - - // new root record - writer.add(newRecord); - - // copy the rest as it is - while (parser.hasNext()) { - final XMLEvent resultEvent = parser.nextEvent(); - - // TODO: replace with depth tracking instead of close tag tracking. - if (resultEvent.isEndElement() - && resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) { - writer.add(eventFactory.get().createEndElement("", null, RESULT)); - break; + for (Namespace ns : nsList) { + xmlEventFactory.createNamespace(ns.getPrefix(), ns.getNamespaceURI()); } - writer.add(resultEvent); + StartElement newRecord = xmlEventFactory.createStartElement("", null, RESULT, null, nsList.iterator()); + + // new root record + writer.add(newRecord); + + // copy the rest as it is + while (parser.hasNext()) { + final XMLEvent resultEvent = parser.nextEvent(); + + // TODO: replace with depth tracking instead of close tag tracking. + if (resultEvent.isEndElement() + && resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) { + writer.add(xmlEventFactory.createEndElement("", null, RESULT)); + break; + } + + writer.add(resultEvent); + } + writer.close(); + indexDocument.addField(INDEX_RESULT, results.toString()); + } finally { + outputFactory.remove(); + eventFactory.remove(); } - writer.close(); - indexDocument.addField(INDEX_RESULT, results.toString()); } /** - * Helper used to add a field to a solr doc. It avoids to add empy fields + * Helper used to add a field to a solr doc, avoids adding empty fields * * @param indexDocument * @param field @@ -231,7 +241,6 @@ public class StreamingInputDocumentFactory { final SolrInputDocument indexDocument, final String field, final String value) { String cleaned = value.trim(); if (!cleaned.isEmpty()) { - // log.info("\n\n adding field " + field.toLowerCase() + " value: " + cleaned + "\n"); indexDocument.addField(field.toLowerCase(), cleaned); } } @@ -243,9 +252,9 @@ public class StreamingInputDocumentFactory { * @return the */ protected final String getText(final XMLEvent text) { - if (text.isEndElement()) // log.warn("skipping because isEndOfElement " + - // text.asEndElement().getName().getLocalPart()); + if (text.isEndElement()) { return ""; + } final String data = text.asCharacters().getData(); if (data != null && data.length() > MAX_FIELD_LENGTH) { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 173ba326a..7487f0956 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -50,7 +50,7 @@ public class TemplateFactory { public String getChild(final String name, final String id, final List metadata) { return getTemplate(resources.getChild()) .add("name", name) - .add("hasId", !(id == null)) + .add("hasId", id != null) .add("id", id != null ? escapeXml(removePrefix(id)) : "") .add("metadata", metadata) .render(); @@ -103,7 +103,7 @@ public class TemplateFactory { (webresources != null ? webresources : new ArrayList()) .stream() .filter(StringUtils::isNotBlank) - .map(w -> getWebResource(w)) + .map(this::getWebResource) .collect(Collectors.toList())) .render(); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 2c8240290..631335376 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -25,8 +25,8 @@ import org.dom4j.Node; import org.dom4j.io.OutputFormat; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; +import org.xml.sax.SAXException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Lists; @@ -46,6 +46,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { + public static final String DISALLOW_DOCTYPE_DECL = "http://apache.org/xml/features/disallow-doctype-decl"; private final Map accumulators; private final Set specialDatasourceTypes; @@ -56,8 +57,6 @@ public class XmlRecordFactory implements Serializable { private boolean indent = false; - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public XmlRecordFactory( final ContextMapper contextMapper, final boolean indent, @@ -86,7 +85,6 @@ public class XmlRecordFactory implements Serializable { final Set contexts = Sets.newHashSet(); - // final OafEntity entity = toOafEntity(je.getEntity()); OafEntity entity = je.getEntity(); TemplateFactory templateFactory = new TemplateFactory(); try { @@ -95,8 +93,7 @@ public class XmlRecordFactory implements Serializable { final List metadata = metadata(type, entity, contexts); // rels has to be processed before the contexts because they enrich the contextMap with - // the - // funding info. + // the funding info. final List links = je.getLinks(); final List relations = links .stream() @@ -122,34 +119,11 @@ public class XmlRecordFactory implements Serializable { } } - private static OafEntity parseOaf(final String json, final String type) { - try { - switch (EntityType.valueOf(type)) { - case publication: - return OBJECT_MAPPER.readValue(json, Publication.class); - case dataset: - return OBJECT_MAPPER.readValue(json, Dataset.class); - case otherresearchproduct: - return OBJECT_MAPPER.readValue(json, OtherResearchProduct.class); - case software: - return OBJECT_MAPPER.readValue(json, Software.class); - case datasource: - return OBJECT_MAPPER.readValue(json, Datasource.class); - case organization: - return OBJECT_MAPPER.readValue(json, Organization.class); - case project: - return OBJECT_MAPPER.readValue(json, Project.class); - default: - throw new IllegalArgumentException("invalid type: " + type); - } - } catch (IOException e) { - throw new IllegalArgumentException(e); - } - } - private String printXML(String xml, boolean indent) { try { - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature(DISALLOW_DOCTYPE_DECL, true); + final Document doc = reader.read(new StringReader(xml)); OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); format.setExpandEmptyElements(false); format.setSuppressDeclaration(true); @@ -157,7 +131,7 @@ public class XmlRecordFactory implements Serializable { XMLWriter writer = new XMLWriter(sw, format); writer.write(doc); return sw.toString(); - } catch (IOException | DocumentException e) { + } catch (IOException | DocumentException | SAXException e) { throw new IllegalArgumentException("Unable to indent XML. Invalid record:\n" + xml, e); } } @@ -203,7 +177,7 @@ public class XmlRecordFactory implements Serializable { final Result r = (Result) entity; if (r.getContext() != null) { - contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList())); + contexts.addAll(r.getContext().stream().map(Context::getId).collect(Collectors.toList())); /* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */ if (contexts.contains("dh-ch::subcommunity::2")) { contexts.add("clarin"); @@ -260,14 +234,14 @@ public class XmlRecordFactory implements Serializable { .collect( Collectors .groupingBy( - p -> p.getValue(), + StructuredProperty::getValue, Collectors .mapping( p -> p, Collectors.minBy(new AuthorPidTypeComparator())))) .values() .stream() - .map(op -> op.get()) + .map(Optional::get) .forEach( sp -> { String pidType = getAuthorPidType(sp.getQualifier().getClassid()); @@ -938,7 +912,7 @@ public class XmlRecordFactory implements Serializable { .getFundingtree() .stream() .filter(Objects::nonNull) - .map(ft -> ft.getValue()) + .map(Field::getValue) .collect(Collectors.toList())); } @@ -1069,7 +1043,7 @@ public class XmlRecordFactory implements Serializable { .getFundingtree() .stream() .peek(ft -> fillContextMap(ft, contexts)) - .map(ft -> getRelFundingTree(ft)) + .map(XmlRecordFactory::getRelFundingTree) .collect(Collectors.toList())); } break; @@ -1112,7 +1086,7 @@ public class XmlRecordFactory implements Serializable { final List links = je.getLinks(); List children = links .stream() - .filter(link -> isDuplicate(link)) + .filter(this::isDuplicate) .map(link -> { final String targetType = link.getTarget().getType(); final String name = ModelSupport.getMainType(EntityType.valueOf(targetType)); @@ -1263,7 +1237,7 @@ public class XmlRecordFactory implements Serializable { return extraInfo != null ? extraInfo .stream() - .map(e -> XmlSerializationUtils.mapExtraInfo(e)) + .map(XmlSerializationUtils::mapExtraInfo) .collect(Collectors.toList()) : Lists.newArrayList(); } @@ -1287,9 +1261,6 @@ public class XmlRecordFactory implements Serializable { if (def == null) { continue; - // throw new IllegalStateException(String.format("cannot find context for id - // '%s'", - // id)); } if (def.getName().equals("context")) { @@ -1327,7 +1298,9 @@ public class XmlRecordFactory implements Serializable { private Transformer getTransformer() { try { - Transformer transformer = TransformerFactory.newInstance().newTransformer(); + final TransformerFactory factory = TransformerFactory.newInstance(); + factory.setFeature(DISALLOW_DOCTYPE_DECL, true); + Transformer transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); return transformer; } catch (TransformerConfigurationException e) { @@ -1354,8 +1327,10 @@ public class XmlRecordFactory implements Serializable { Document fundingPath; try { - fundingPath = new SAXReader().read(new StringReader(xmlTree)); - } catch (final DocumentException e) { + final SAXReader reader = new SAXReader(); + reader.setFeature(DISALLOW_DOCTYPE_DECL, true); + fundingPath = reader.read(new StringReader(xmlTree)); + } catch (final DocumentException | SAXException e) { throw new RuntimeException(e); } try { @@ -1407,7 +1382,9 @@ public class XmlRecordFactory implements Serializable { protected static String getRelFundingTree(final String xmlTree) { String funding = ""; try { - final Document ftree = new SAXReader().read(new StringReader(xmlTree)); + final SAXReader reader = new SAXReader(); + reader.setFeature(DISALLOW_DOCTYPE_DECL, true); + final Document ftree = reader.read(new StringReader(xmlTree)); funding = ""; funding += getFunderElement(ftree); @@ -1427,7 +1404,7 @@ public class XmlRecordFactory implements Serializable { + e.getName() + ">"; } - } catch (final DocumentException e) { + } catch (final DocumentException | SAXException e) { throw new IllegalArgumentException( "unable to parse funding tree: " + xmlTree + "\n" + e.getMessage()); } finally { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java index 8195467b1..213a62b32 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java @@ -11,9 +11,12 @@ public class XmlSerializationUtils { // XML 1.0 // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] - private static final String xml10pattern = "[^" + "\u0009\r\n" + "\u0020-\uD7FF" + "\uE000-\uFFFD" + private static final String XML_10_PATTERN = "[^" + "\u0009\r\n" + "\u0020-\uD7FF" + "\uE000-\uFFFD" + "\ud800\udc00-\udbff\udfff" + "]"; + private XmlSerializationUtils() { + } + public static String mapJournal(Journal j) { final String attrs = new StringBuilder() .append(attr("issn", j.getIssnPrinted())) @@ -50,12 +53,12 @@ public class XmlSerializationUtils { public static String escapeXml(final String value) { return value - .replaceAll("&", "&") - .replaceAll("<", "<") - .replaceAll(">", ">") - .replaceAll("\"", """) - .replaceAll("'", "'") - .replaceAll(xml10pattern, ""); + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """) + .replace("'", "'") + .replaceAll(XML_10_PATTERN, ""); } public static String parseDataInfo(final DataInfo dataInfo) { @@ -70,18 +73,6 @@ public class XmlSerializationUtils { .toString(); } - private static StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo info) { - return sb - .append( - attr("inferred", info.getInferred() != null ? info.getInferred().toString() : "")) - .append(attr("inferenceprovenance", info.getInferenceprovenance())) - .append( - attr( - "provenanceaction", - info.getProvenanceaction() != null ? info.getProvenanceaction().getClassid() : "")) - .append(attr("trust", info.getTrust())); - } - public static String mapKeyValue(final String name, final KeyValue kv) { return new StringBuilder() .append("<") diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java index 6cec3ed53..903150ca8 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java @@ -25,7 +25,7 @@ public class ZkServers { // quorum0:2182,quorum1:2182,quorum2:2182,quorum3:2182,quorum4:2182/solr-dev-openaire String urls = zkUrl; final Optional chRoot = Optional.of(SEPARATOR + StringUtils.substringAfterLast(zkUrl, SEPARATOR)); - if (chRoot.isPresent() && StringUtils.isNotBlank(chRoot.get())) { + if (StringUtils.isNotBlank(chRoot.get())) { log.debug(String.format("found zk chroot %s", chRoot)); urls = zkUrl.replace(chRoot.get(), ""); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java index ffeb0995d..e5faccd0f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java @@ -45,6 +45,7 @@ public class DropAndCreateESIndex { .requireNonNull( DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json"))); + @SuppressWarnings("unchecked") Map clusterMap = new ObjectMapper().readValue(clusterJson, Map.class); final String ip = clusterMap.get(cluster).split(",")[0]; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java index 3f842ef34..dd08215d5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java @@ -44,6 +44,7 @@ public class SparkIndexCollectionOnES { .requireNonNull( DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json"))); + @SuppressWarnings("unchecked") final Map clusterMap = new ObjectMapper().readValue(clusterJson, Map.class); final SparkSession spark = SparkSession.builder().config(conf).getOrCreate(); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e1ed4ffe4..9f022454b 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -4,7 +4,7 @@ package eu.dnetlib.dhp.oa.provision; import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; -import java.util.List; +import java.util.Objects; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -16,11 +16,9 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; @@ -35,7 +33,7 @@ import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory; * * The input is a JoinedEntity, i.e. a json representation of an OpenAIRE entity that embeds all the linked entities. */ -public class IndexRecordTransformerTest { +class IndexRecordTransformerTest { public static final String VERSION = "2021-04-15T10:05:53Z"; public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"; @@ -48,23 +46,23 @@ public class IndexRecordTransformerTest { } @Test - public void testPreBuiltRecordTransformation() throws IOException, TransformerException { - String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); + void testPreBuiltRecordTransformation() throws IOException, TransformerException { + String record = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("record.xml"))); testRecordTransformation(record); } @Test - public void testPublicationRecordTransformation() throws IOException, TransformerException { + void testPublicationRecordTransformation() throws IOException, TransformerException { - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, XmlRecordFactoryTest.otherDsTypeId); Publication p = load("publication.json", Publication.class); Project pj = load("project.json", Project.class); Relation rel = load("relToValidatedProject.json", Relation.class); - JoinedEntity je = new JoinedEntity<>(p); + JoinedEntity je = new JoinedEntity<>(p); je .setLinks( Lists @@ -80,8 +78,9 @@ public class IndexRecordTransformerTest { } private void testRecordTransformation(String record) throws IOException, TransformerException { - String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); - String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); + String fields = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("fields.xml"))); + String xslt = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"))); String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); @@ -99,7 +98,7 @@ public class IndexRecordTransformerTest { private T load(String fileName, Class clazz) throws IOException { return XmlRecordFactoryTest.OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz); + .readValue(IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream(fileName))), clazz); } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java index 6818cf6a5..c22a24185 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java @@ -1,6 +1,9 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -62,7 +65,7 @@ public class PrepareRelationsJobTest { } @Test - public void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception { + void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception { final int maxRelations = 20; PrepareRelationsJob @@ -83,7 +86,7 @@ public class PrepareRelationsJobTest { .as(Encoders.bean(Relation.class)) .cache(); - Assertions.assertEquals(maxRelations, out.count()); + assertEquals(maxRelations, out.count()); Dataset freq = out .toDF() @@ -97,13 +100,13 @@ public class PrepareRelationsJobTest { long participation = getRows(freq, PARTICIPATION).get(0).getAs("count"); long affiliation = getRows(freq, AFFILIATION).get(0).getAs("count"); - Assertions.assertTrue(participation == outcome); - Assertions.assertTrue(outcome > affiliation); - Assertions.assertTrue(participation > affiliation); + assertEquals(outcome, participation); + assertTrue(outcome > affiliation); + assertTrue(participation > affiliation); - Assertions.assertEquals(7, outcome); - Assertions.assertEquals(7, participation); - Assertions.assertEquals(6, affiliation); + assertEquals(7, outcome); + assertEquals(7, participation); + assertEquals(6, affiliation); } protected List getRows(Dataset freq, String col) { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java index f57b8dcaf..9d5bff3cf 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java @@ -1,39 +1,42 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import org.apache.solr.client.solrj.response.SolrPingResponse; import org.apache.solr.client.solrj.response.UpdateResponse; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -public class SolrAdminApplicationTest extends SolrTest { +class SolrAdminApplicationTest extends SolrTest { @Test - public void testPing() throws Exception { + void testPing() throws Exception { SolrPingResponse pingResponse = miniCluster.getSolrClient().ping(); log.info("pingResponse: '{}'", pingResponse.getStatus()); - Assertions.assertTrue(pingResponse.getStatus() == 0); + assertEquals(0, pingResponse.getStatus()); } @Test - public void testAdminApplication_DELETE() throws Exception { + void testAdminApplication_DELETE() throws Exception { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); UpdateResponse rsp = (UpdateResponse) admin .execute(SolrAdminApplication.Action.DELETE_BY_QUERY, DEFAULT_COLLECTION, "*:*", false); - Assertions.assertTrue(rsp.getStatus() == 0); + assertEquals(0, rsp.getStatus()); } @Test - public void testAdminApplication_COMMIT() throws Exception { + void testAdminApplication_COMMIT() throws Exception { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); UpdateResponse rsp = (UpdateResponse) admin.commit(DEFAULT_COLLECTION); - Assertions.assertTrue(rsp.getStatus() == 0); + assertEquals(0, rsp.getStatus()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java index 72f28fdf2..dc0a40471 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java @@ -28,7 +28,6 @@ public class SortableRelationKeyTest { .map(r -> SortableRelationKey.create(r, r.getSource())) .sorted() .forEach( - it -> { try { System.out.println(mapper.writeValueAsString(it)); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index d7bcb3185..6f1956578 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -89,7 +89,7 @@ public class XmlIndexingJobTest extends SolrTest { } @Test - public void testXmlIndexingJob_onSolr() throws Exception { + void testXmlIndexingJob_onSolr() throws Exception { String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; @@ -112,7 +112,7 @@ public class XmlIndexingJobTest extends SolrTest { } @Test - public void testXmlIndexingJob_saveOnHDFS() throws Exception { + void testXmlIndexingJob_saveOnHDFS() throws Exception { final String ID_XPATH = "//header/*[local-name()='objIdentifier']"; String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 221049f90..d89b3b068 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -13,6 +13,7 @@ import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; @@ -21,7 +22,6 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; -import eu.dnetlib.dhp.oa.provision.utils.ContextDef; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; import eu.dnetlib.dhp.schema.oaf.Dataset; @@ -29,7 +29,7 @@ import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; -public class XmlRecordFactoryTest { +class XmlRecordFactoryTest { public static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; @@ -37,11 +37,11 @@ public class XmlRecordFactoryTest { .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Test - public void testXMLRecordFactory() throws IOException, DocumentException { + void testXMLRecordFactory() throws IOException, DocumentException { ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Publication p = OBJECT_MAPPER @@ -72,11 +72,11 @@ public class XmlRecordFactoryTest { } @Test - public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { + void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Publication p = OBJECT_MAPPER @@ -104,11 +104,11 @@ public class XmlRecordFactoryTest { } @Test - public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { + void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Publication p = OBJECT_MAPPER @@ -135,7 +135,7 @@ public class XmlRecordFactoryTest { } @Test - public void testEnermapsRecord() throws IOException, DocumentException { + void testEnermapsRecord() throws IOException, DocumentException, SAXException { String contextmap = "" + @@ -144,7 +144,7 @@ public class XmlRecordFactoryTest { ""; ContextMapper contextMapper = ContextMapper.fromXml(contextmap); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Dataset d = OBJECT_MAPPER diff --git a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java index 5b2e6804b..93f1bc087 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java @@ -7,16 +7,8 @@ package eu.dnetlib.oa.graph.usagerawdata.export; import java.sql.Connection; -import java.sql.DriverManager; import java.sql.SQLException; -import java.sql.Statement; -import java.util.Properties; -import org.apache.log4j.Logger; - -/** - * @author D. Pierrakos, S. Zoupanos - */ /** * @author D. Pierrakos, S. Zoupanos */ @@ -31,7 +23,9 @@ public abstract class ConnectDB { private static String dbImpalaUrl; private static String usageStatsDBSchema; private static String statsDBSchema; - private final static Logger log = Logger.getLogger(ConnectDB.class); + + private ConnectDB() { + } static void init() throws ClassNotFoundException { @@ -72,10 +66,6 @@ public abstract class ConnectDB { } private static Connection connectHive() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbHiveUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbHiveUrl); cpds.setAcquireIncrement(1); @@ -97,10 +87,6 @@ public abstract class ConnectDB { } private static Connection connectImpala() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbImpalaUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbImpalaUrl); cpds.setAcquireIncrement(1); diff --git a/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java b/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java index e53709f1a..afd7f9807 100644 --- a/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java @@ -7,20 +7,14 @@ package eu.dnetlib.oa.graph.usagestatsbuild.export; import java.sql.Connection; -import java.sql.DriverManager; import java.sql.SQLException; -import java.sql.Statement; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; -import java.util.Properties; import org.apache.log4j.Logger; -/** - * @author D. Pierrakos, S. Zoupanos - */ /** * @author D. Pierrakos, S. Zoupanos */ @@ -37,7 +31,9 @@ public abstract class ConnectDB { private static String usageStatsDBSchema; private static String usagestatsPermanentDBSchema; private static String statsDBSchema; - private final static Logger log = Logger.getLogger(ConnectDB.class); + + private ConnectDB() { + } static void init() throws ClassNotFoundException { @@ -94,10 +90,6 @@ public abstract class ConnectDB { } private static Connection connectHive() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbHiveUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbHiveUrl); cpds.setAcquireIncrement(1); @@ -119,10 +111,6 @@ public abstract class ConnectDB { } private static Connection connectImpala() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbImpalaUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbImpalaUrl); cpds.setAcquireIncrement(1); From 61d811ba531ab79ce314f799fa8471007f7dd03c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 11 Aug 2021 12:18:20 +0200 Subject: [PATCH 160/287] suggestions from intellij --- .../eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java | 2 +- .../dhp/oa/graph/raw/PatchRelationApplicationTest.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index 6c309acb3..cc36421a0 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -20,7 +20,7 @@ import eu.dnetlib.dhp.collection.HttpConnector2; public class EXCELParserTest { private static Path workingDir; - private HttpConnector2 httpConnector = new HttpConnector2(); + private final HttpConnector2 httpConnector = new HttpConnector2(); private static final String URL = "https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx"; @BeforeAll diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java index c8158c044..c9c32edd9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -81,8 +81,8 @@ public class PatchRelationApplicationTest { PatchRelationsApplication.main(new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphBasePath", graphBasePath, - "-workingDir", workingDir.toString() + "/workingDir", - "-idMappingPath", workingDir.toString() + "/" + ID_MAPPING_PATH + "-workingDir", workingDir + "/workingDir", + "-idMappingPath", workingDir + "/" + ID_MAPPING_PATH }); final List rels = spark From 9f4db73f30981aafb57440a3332c4095ef30f8d9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 11 Aug 2021 15:02:51 +0200 Subject: [PATCH 161/287] updated/fixed unit tests --- .../collection/plugin/rest/RestIterator.java | 1 - .../oa/dedup/DispatchEntitiesSparkJob.java | 1 + .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 6 ++-- .../dnetlib/dhp/oa/dedup/SparkStatsTest.java | 29 +++++++++++-------- .../doiboost/orcid/OrcidClientTest.java | 1 + .../orcidnodoi/PublicationToOafTest.java | 4 +-- .../graph/dump/community/CommunitySplit.java | 28 ++++++++++-------- .../oa/graph/dump/SplitForCommunityTest.java | 2 +- .../CreateRelatedEntitiesJob_phase1.java | 7 +++-- .../oa/provision/utils/XmlRecordFactory.java | 1 - 10 files changed, 45 insertions(+), 35 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java index 67ec22b46..a90d259b4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java @@ -132,7 +132,6 @@ public class RestIterator implements Iterator { private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) throws TransformerConfigurationException, XPathExpressionException { final TransformerFactory factory = TransformerFactory.newInstance(); - factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java index 2cdc22153..ea738836b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java @@ -57,6 +57,7 @@ public class DispatchEntitiesSparkJob { String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); + @SuppressWarnings("unchecked") Class entityClazz = (Class) Class.forName(graphTableClassName); SparkConf conf = new SparkConf(); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 1860a3ff0..2f992bd78 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -195,7 +195,7 @@ public class SparkDedupTest implements Serializable { assertEquals(3082, orgs_simrel); assertEquals(7036, pubs_simrel); - assertEquals(344, sw_simrel); + assertEquals(336, sw_simrel); assertEquals(442, ds_simrel); assertEquals(6750, orp_simrel); } @@ -346,7 +346,7 @@ public class SparkDedupTest implements Serializable { assertEquals(1272, orgs_mergerel); assertEquals(1438, pubs_mergerel); - assertEquals(288, sw_mergerel); + assertEquals(286, sw_mergerel); assertEquals(472, ds_mergerel); assertEquals(718, orp_mergerel); @@ -535,7 +535,7 @@ public class SparkDedupTest implements Serializable { long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - assertEquals(4862, relations); + assertEquals(4860, relations); // check deletedbyinference final Dataset mergeRels = spark diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java index 2efbe260a..1ba2c717c 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java @@ -41,7 +41,7 @@ public class SparkStatsTest implements Serializable { private static final String testActionSetId = "test-orchestrator"; @BeforeAll - public static void cleanUp() throws IOException, URISyntaxException { + public static void beforeAll() throws IOException, URISyntaxException { testGraphBasePath = Paths .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) @@ -73,7 +73,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); @@ -82,7 +82,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); @@ -91,7 +91,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); @@ -100,7 +100,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); @@ -109,7 +109,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); @@ -118,7 +118,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); } @@ -129,7 +129,7 @@ public class SparkStatsTest implements Serializable { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/createBlockStats_parameters.json"))); parser @@ -168,10 +168,15 @@ public class SparkStatsTest implements Serializable { .textFile(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") .count(); - assertEquals(549, orgs_blocks); - assertEquals(299, pubs_blocks); + assertEquals(477, orgs_blocks); + assertEquals(295, pubs_blocks); assertEquals(122, sw_blocks); - assertEquals(186, ds_blocks); - assertEquals(170, orp_blocks); + assertEquals(191, ds_blocks); + assertEquals(171, orp_blocks); + } + + @AfterAll + public static void tearDown() { + spark.close(); } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 82577e0d8..9ea7c6959 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -160,6 +160,7 @@ public class OrcidClientTest { } @Test + @Disabled void testReadBase64CompressedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java index 1dbb37fca..54c16b5d7 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java @@ -4,6 +4,7 @@ package eu.dnetlib.doiboost.orcidnodoi; import static org.junit.jupiter.api.Assertions.*; import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,14 +15,13 @@ import com.google.gson.JsonParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; -import jdk.nashorn.internal.ir.annotations.Ignore; class PublicationToOafTest { private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class); @Test - @Ignore + @Disabled void convertOafPublicationTest() throws Exception { String jsonPublication = IOUtils .toString( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index a9c0770a8..b92eb3e60 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump.community; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.NoSuchElementException; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -48,29 +49,32 @@ public class CommunitySplit implements Serializable { .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); communities - .stream() .forEach(c -> printResult(c, result, outputPath)); } - private static void printResult(String c, Dataset result, String outputPath) { - Dataset community_products = result - .filter((FilterFunction) r -> containsCommunity(r, c)); + private static void printResult(String community, Dataset result, String outputPath) { + Dataset communityProducts = result + .filter((FilterFunction) r -> containsCommunity(r, community)); - community_products.first(); - community_products - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath + "/" + c); + try { + communityProducts.first(); + communityProducts + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath + "/" + community); + } catch (NoSuchElementException e) { + // ignoring it on purpose + } } - private static boolean containsCommunity(CommunityResult r, String c) { + private static boolean containsCommunity(CommunityResult r, String community) { if (Optional.ofNullable(r.getContext()).isPresent()) { return !r .getContext() .stream() - .filter(con -> con.getCode().equals(c)) + .filter(con -> con.getCode().equals(community)) .collect(Collectors.toList()) .isEmpty(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java index 2ea4bc91a..e6f0e9106 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java @@ -62,7 +62,7 @@ public class SplitForCommunityTest { } @Test - void test1() { + void testCommunitySplit() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity") diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 3301577ee..ed33ff6b6 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -72,6 +72,7 @@ public class CreateRelatedEntitiesJob_phase1 { String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); + @SuppressWarnings("unchecked") Class entityClazz = (Class) Class.forName(graphTableClassName); SparkConf conf = new SparkConf(); @@ -223,10 +224,10 @@ public class CreateRelatedEntitiesJob_phase1 { /** * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text - * file, + * file * - * @param spark - * @param relationPath + * @param spark the SparkSession + * @param relationPath the path storing the relation objects * @return the Dataset containing all the relationships */ private static Dataset readPathRelation( diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 631335376..adc8aca9e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1299,7 +1299,6 @@ public class XmlRecordFactory implements Serializable { private Transformer getTransformer() { try { final TransformerFactory factory = TransformerFactory.newInstance(); - factory.setFeature(DISALLOW_DOCTYPE_DECL, true); Transformer transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); return transformer; From 52c18c2697b17e17916284b7b2f028ad28df7bcf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 16:16:55 +0200 Subject: [PATCH 162/287] removed not needed test class. Teh functionality has been moved --- .../eu/dnetlib/dhp/doiboost/GetCSVTest.java | 43 ------------------- 1 file changed, 43 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java deleted file mode 100644 index 6cfc90736..000000000 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java +++ /dev/null @@ -1,43 +0,0 @@ - -package eu.dnetlib.dhp.doiboost; - -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; - -public class GetCSVTest { - - @Test - public void readUnibiGoldTest() throws Exception { - - String programmecsv = IOUtils - .toString( - getClass() - .getClassLoader() - .getResourceAsStream("eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv")); - - CSVParser csvParser = new CSVParser(); - - List pl = csvParser.parse(programmecsv, "eu.dnetlib.doiboost.UnibiGoldModel", ','); - - Assertions.assertEquals(72, pl.size()); - -// ObjectMapper OBJECT_MAPPER = new ObjectMapper(); -// -// pl.forEach(res -> { -// try { -// System.out.println(OBJECT_MAPPER.writeValueAsString(res)); -// } catch (JsonProcessingException e) { -// e.printStackTrace(); -// } -// }); - - } -} From b1c6140ebf01608af7792d4d528c83ea62760616 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 16:23:33 +0200 Subject: [PATCH 163/287] removed all comments in Italian --- .../hostedbymap/SparkApplyHostedByMapToDatasource.scala | 2 -- .../hostedbymap/SparkApplyHostedByMapToResult.scala | 5 ----- .../hostedbymap/SparkPrepareHostedByInfoToApply.scala | 9 +++++---- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index fad313f1c..ae1454b47 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -61,8 +61,6 @@ object SparkApplyHostedByMapToDatasource { val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) - //c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 4c3b98f3b..533e439b7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -14,9 +14,6 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ -//a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance -// nel result => salto)con l'hosted by anche access right della instance se openaccess e' true - object SparkApplyHostedByMapToResult { @@ -76,8 +73,6 @@ object SparkApplyHostedByMapToResult { val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - //a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance - // nel result => salto)con l'hosted by anche access right della instance se openaccess e' true applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index d342d57b0..6db0ad33d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -106,14 +106,15 @@ object SparkPrepareHostedByInfoToApply { import spark.implicits._ - //STEP1: leggere la hostedbymap e trasformarla in entity info + //STEP1: read the hostedbymap and transform it in EntityInfo val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP2: creare la mappa publication id issn, eissn, lissn esplosa + //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") - //STEP3: join resultInfo con hostedByInfo sul journal_id dal result con left - // e riduzione di tutti i result con lo stesso id in una unica entry con aggiunto l'id della datasource + //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just + //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map) + //to this entry we add the id of the datasource for the next step joinResHBM(resultInfoDataset, hostedByInfo) .write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) From 822963283954830d16429411e983529de815b179 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 16:36:01 +0200 Subject: [PATCH 164/287] adding assertions to the mapping of the unibi part of gold list --- .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java index f886b275b..98b811bb6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -7,6 +7,8 @@ import java.net.URLConnection; import java.nio.charset.Charset; import java.util.List; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import com.fasterxml.jackson.core.JsonProcessingException; @@ -29,18 +31,13 @@ public class TestReadCSV { .build() .parse(); - ObjectMapper mapper = new ObjectMapper(); - - beans.forEach(r -> { - try { - System.out.println(mapper.writeValueAsString(r)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - }); + Assertions.assertEquals(36, beans.size()); + Assertions.assertEquals(1, beans.stream().filter(e -> e.getIssn().equals("0001-625X")).count()); + Assertions.assertTrue(beans.stream().anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica"))); + Assertions.assertTrue(beans.stream().allMatch(e -> e.getIssn().equals(e.getIssn_l()))); } - + @Disabled @Test public void testCSVUrlUnibi() throws IOException { @@ -66,6 +63,7 @@ public class TestReadCSV { ); } + @Disabled @Test public void testCSVUrlDOAJ() throws IOException { From f9b6b45d85e3ea9e676062366a2e249de1f7bd03 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:04:48 +0200 Subject: [PATCH 165/287] reverting --- .../src/test/resources/eu/dnetlib/dhp/transform/terms.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 30138b5bc..93cc00eca 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn +datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From 55fc500d8d0d6736ca10e06c01e83c784299e93f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:17:48 +0200 Subject: [PATCH 166/287] reverting --- .../src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml index b421f1342..06325810b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml @@ -31,7 +31,7 @@ https://pub.uni-bielefeld.de/record/1997560.json - 0016-9056 + 0016-9056 ger Friedrich From 7aa3260729a7a145842958526d3feedfa3461e8c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:18:45 +0200 Subject: [PATCH 167/287] reverting --- .../original/dc_cleaning_OPENAIREplus_compliant_hal_orig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig index 505f56294..d4eb71dc4 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig @@ -134,9 +134,9 @@ oaf:identifier = set(xpath:"//dri:recordIdentifier", @identifierType = "oai-orig oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; // journal data -// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the issn (as e.g. print/online/...) -$varISSN = xpath:"//dc:source[starts-with(., 'issn:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'issn:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'issn:')), 1, 4))))"; -//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'issn:'))"; +// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the ISSN (as e.g. print/online/...) +$varISSN = xpath:"//dc:source[starts-with(., 'ISSN:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'ISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'ISSN:')), 1, 4))))"; +//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'ISSN:'))"; $varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/concat(substring(normalize-space(substring-after(., 'EISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'EISSN:')), 1, 4))))"; oaf:journal = set(xpath:"//oaf:datasourceprefix[$varISSN or $varEISSN]/''", @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";); From 524c06e0283244758df98add2d622db44dd7f16b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:20:30 +0200 Subject: [PATCH 168/287] reverting --- ...e_datacite_ExchangeLandingpagePid_orig.xsl | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl index f80c91a6a..3cfaec80b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl @@ -169,7 +169,7 @@ @@ -283,7 +283,7 @@ - + - + - + @@ -793,9 +793,9 @@ - + - + @@ -844,7 +844,7 @@ - + - + - + @@ -594,9 +594,9 @@ - + - + From 804589eb30f616314c9d9e01c0681312548e2777 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:23:35 +0200 Subject: [PATCH 170/287] reverting --- .../src/test/resources/eu/dnetlib/dhp/transform/terms.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 30138b5bc..93cc00eca 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn +datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From b6b58bba281176ea534c75cee5a440b99ac4908c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:25:37 +0200 Subject: [PATCH 171/287] reverting --- .../eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv b/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv index 0c1fd9e64..6a5fc3f87 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv @@ -36,10 +36,10 @@ article-number,String,No,,N/A, published-print,Partial Date,No,Date on which the work was published in print,"Result/relevantDate with Qualifier(""published-print"", ""dnet:dataCite_date"")", published-online,Partial Date,No,Date on which the work was published online,"Result/relevantDate with Qualifier(""published-online"", ""dnet:dataCite_date"")", subject,Array of String,No,"Subject category names, a controlled vocabulary from Sci-Val. Available for most journal articles","Result/subject with Qualifier(""keywords"", ""dnet:subject_classification_typologies""). ","Future improvements: map the controlled vocabulary instead of using the generic ""keywords"" qualifier" -issn,Array of String,No,,"Publication/Journal/issn +ISSN,Array of String,No,,"Publication/Journal/issn Publication/Journal/lissn Publication/Journal/eissn",The mapping depends on the value of issn-type -issn-type,Array of issn with Type,No,List of ISSNs with issn type information,N/A,Its value guides the setting of the properties in Journal (see row above) +issn-type,Array of ISSN with Type,No,List of ISSNs with ISSN type information,N/A,Its value guides the setting of the properties in Journal (see row above) ISBN,Array of String,No,,Publication/source,"In case of Book We can map ISBN and container title on Publication/source using this syntax container-title + ""ISBN: "" + ISBN" archive,Array of String,No,,N/A, license,Array of License,No,,Result/Instance/License, From c6a2a780a9b181395ae455a661ff620ecbcb6f22 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:30:17 +0200 Subject: [PATCH 172/287] reverting --- .../src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index 4db5fd463..ba47aaf5c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn +datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From 0e1a6bec20edf4995f9ccb9825907f3a545f3817 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:32:29 +0200 Subject: [PATCH 173/287] reverting --- .../resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml index b45fa1edb..95457fb70 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml @@ -16,7 +16,7 @@ Doğuş Üniversitesi, Fen Edebiyat Fakültesi, Fizik Bölümü TR3959 urn:issn:1748-0221 - VOLUME=7;ISSUE=1;issn=1748-0221;TITLE=Journal of Instrumentation + VOLUME=7;ISSUE=1;ISSN=1748-0221;TITLE=Journal of Instrumentation ATLAS Collaboration Mitsou, Vasiliki Fiorini, Luca Ros Martínez, Eduardo Castillo Giménez, María Victoria Fuster Verdú, Juan A. García García, Carmen Cabrera Urbán, Susana Martí García, Salvador Salt Cairols, José Lacasta Llácer, Carlos Valls Ferrer, @@ -30,7 +30,7 @@ interactions. Journal of Instrumentation, 7(1). doi: 10.1088/1748-0221/7/01/P01013. UC Santa Cruz: Retrieved from: http://www.escholarship.org/uc/item/05j2j2br Journal of Instrumentation, 7 - VOLUME=7;issn=1748-0221;TITLE=Journal of Instrumentation + VOLUME=7;ISSN=1748-0221;TITLE=Journal of Instrumentation 1748-0221 Journal of Instrumentation 7, P01013 (2012). doi:10.1088/1748-0221/7/01/P01013 From 8ad7c7141702b3a1638d7d1abe26889f300ea480 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:36:12 +0200 Subject: [PATCH 174/287] reverting --- .../dhp/sx/graph/bio/pubmed/pubmed.xml | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml index 06ebe97cd..22da07e29 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml @@ -16,7 +16,7 @@
- 0006-2944 + 0006-2944 13 2 @@ -182,7 +182,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -314,7 +314,7 @@
- 0006-291X + 0006-291X 66 4 @@ -460,7 +460,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -644,7 +644,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -774,7 +774,7 @@
- 0006-291X + 0006-291X 66 4 @@ -934,7 +934,7 @@
- 1873-2968 + 1873-2968 24 16 @@ -1614,7 +1614,7 @@
- 1873-2968 + 1873-2968 24 16 @@ -2017,7 +2017,7 @@
- 0006-2952 + 0006-2952 24 17 @@ -2185,7 +2185,7 @@
- 1873-2968 + 1873-2968 24 17 @@ -2337,7 +2337,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -2585,7 +2585,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -2838,7 +2838,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3020,7 +3020,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3204,7 +3204,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3450,7 +3450,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3683,7 +3683,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3880,7 +3880,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4069,7 +4069,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4428,7 +4428,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4590,7 +4590,7 @@
- 0004-4172 + 0004-4172 25 9 @@ -4741,7 +4741,7 @@
- 0004-4172 + 0004-4172 25 9 @@ -4999,7 +4999,7 @@
- 0004-4172 + 0004-4172 25 9 From b9663298330212499fef14413b35ced9d336dc57 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:37:00 +0200 Subject: [PATCH 175/287] reverting --- .../src/test/resources/eu/dnetlib/dhp/transform/terms.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 30138b5bc..93cc00eca 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn +datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From cc3d72df0e9dd8781e0ae8ae59b57032bca4098e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:42:01 +0200 Subject: [PATCH 176/287] removing not needed dependency --- dhp-common/pom.xml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index a6a57ce7b..c66c1d3bc 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -118,11 +118,7 @@ dhp-schemas - - org.apache.commons - commons-csv - 1.8 - + From 95e5482bbb35acc1ae269fb018017ec3f8240583 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:42:26 +0200 Subject: [PATCH 177/287] removing not needed dependency --- dhp-workflows/dhp-doiboost/pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index ea8832754..385f87ad2 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -82,12 +82,6 @@ org.apache.commons commons-text - - eu.dnetlib.dhp - dhp-aggregation - 1.2.4-SNAPSHOT - compile - From 785db1d5b2c26cf3e81bc9a38b54ec60c26764f9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:44:07 +0200 Subject: [PATCH 178/287] refactoring --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java index 98b811bb6..89259d814 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -33,10 +33,15 @@ public class TestReadCSV { Assertions.assertEquals(36, beans.size()); Assertions.assertEquals(1, beans.stream().filter(e -> e.getIssn().equals("0001-625X")).count()); - Assertions.assertTrue(beans.stream().anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica"))); + Assertions + .assertTrue( + beans + .stream() + .anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica"))); Assertions.assertTrue(beans.stream().allMatch(e -> e.getIssn().equals(e.getIssn_l()))); } + @Disabled @Test public void testCSVUrlUnibi() throws IOException { From 9650eea497b3d131fab34f37afab12c872a26afc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:45:48 +0200 Subject: [PATCH 179/287] reverting --- dhp-common/pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index c66c1d3bc..4c7810c47 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -117,12 +117,6 @@ eu.dnetlib.dhp dhp-schemas - - - - - - From e33daaeee89b253cde8968a8c33cad4b346e9aae Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:46:19 +0200 Subject: [PATCH 180/287] reverting --- dhp-workflows/dhp-doiboost/pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index 385f87ad2..f496ea9a2 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -84,6 +84,7 @@ + From ac417ca798b07e77765d12ffe8c3c3774b1bc111 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:50:33 +0200 Subject: [PATCH 181/287] removed not needed test resource --- .../dhp/doiboost/issn_gold_oa_version_4.csv | 73 ------------------- 1 file changed, 73 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv deleted file mode 100644 index ebde1bf18..000000000 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv +++ /dev/null @@ -1,73 +0,0 @@ -"issn","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" -"0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" -"0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" -"0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" -"0003-424X","0003-424X",0,1,0,0,1,0,0,1,0,0,1,0,"Annales de zootechnie.","ROAD" -"0003-4827","0003-4827",0,1,0,0,0,1,0,1,0,0,0,1,"Annals of Iowa.","ROAD" -"0004-0592","0004-0592",1,1,0,0,1,1,1,1,0,0,1,1,"Archivos de Zootecnia","DOAJ" -"0004-282X","0004-282X",1,1,0,0,1,1,1,1,0,0,1,1,"Arquivos de Neuro-Psiquiatria","DOAJ" -"0006-3096","0006-3096",0,1,0,0,0,0,0,1,0,0,0,0,"Biologia.","ROAD" -"0006-8705","0006-8705",1,1,0,0,1,1,1,1,0,0,1,1,"Bragantia","DOAJ" -"0007-5124","0007-5124",0,1,0,0,1,0,0,1,1,0,1,1,"Experimental animals.","ROAD" -"0007-9502","0007-9502",0,1,0,0,0,0,0,1,0,0,0,0,"Caesaraugusta.","ROAD" -"0008-7386","0008-7386",1,1,0,0,0,1,1,1,0,0,0,1,"Časopis pro Moderní Filologii","DOAJ" -"0008-7629","0008-7629",1,0,0,0,0,0,1,0,0,0,0,0,"Catalogue and Index","DOAJ" -"0015-573X","0015-573X",0,1,0,0,0,0,0,1,0,0,0,0,"Folia quaternaria.","ROAD" -"0016-6987","0016-6987",1,0,0,0,1,1,1,0,0,0,1,1,"Genus","DOAJ" -"0016-7789","0016-7789",1,1,0,0,0,1,1,1,0,0,0,1,"Geologija ","DOAJ" -"0021-5007","0021-5007",0,1,0,0,0,1,0,1,0,0,0,1,"Nihon Seitai Gakkaishi.","ROAD" -"0023-4001","0023-4001",0,1,0,0,1,1,0,1,0,0,1,1,"Korean Journal of Parasitology","ROAD" -"0023-5415","0023-5415",1,1,0,0,0,0,1,1,0,0,0,0,"Kunst og Kultur","DOAJ" -"0026-1165","0026-1165",1,0,0,0,1,1,1,0,0,0,1,1,"Journal of the Meteorological Society of Japan","DOAJ" -"0029-0181","0029-0181",0,1,0,0,0,0,0,1,0,0,0,0,"Nihon butsuri gakkaishi.","ROAD" -"0034-7000","0034-7000",1,1,0,0,0,1,1,1,0,0,0,1,"Revista Argentina de Cardiología","DOAJ" -"0034-7523","0034-7523",0,1,0,0,0,1,0,1,0,0,0,1,"Revista cubana de medicina.","ROAD" -"0034-8244","0034-8244",1,0,0,0,1,1,1,0,0,0,1,1,"Revista de Filosofia","DOAJ" -"0034-8678","0034-8678",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Pedagogie","DOAJ" -"0036-8709","0036-8709",1,1,1,0,1,1,1,1,1,0,1,1,"Scientia Pharmaceutica","DOAJ" -"0044-4855","0044-4855",0,1,0,0,0,0,0,1,0,0,0,0,"Život i škola.","ROAD" -"0048-7449","0048-7449",1,1,0,0,1,1,1,1,0,0,1,1,"Reumatismo","DOAJ" -"0048-766X","0048-766X",0,1,0,0,0,1,0,1,0,0,0,1,"Revista chilena de obstetricia y ginecología.","ROAD" -"0065-1400","0065-1400",0,1,0,0,1,1,0,1,0,0,1,1,"Acta Neurobiologiae Experimentalis.","ROAD" -"0066-6742","0066-6742",1,0,0,0,1,1,1,0,0,0,1,1,"Archivo Español de Arqueología","DOAJ" -"0073-2435","0073-2435",1,1,0,0,1,1,1,1,0,0,1,1,"Historia (Santiago)","DOAJ" -"0073-4918","0073-4918",0,1,0,0,0,0,0,1,0,0,0,0,"Illinois Natural History Survey bulletin.","ROAD" -"0075-7411","0075-7411",1,0,0,0,0,0,1,0,0,0,0,0,"Anales","DOAJ" -"0077-2704","0077-2704",0,1,0,0,0,0,0,1,0,0,0,0,"Namn och bygd.","ROAD" -"0078-5466","0078-5466",0,1,0,0,1,1,0,1,0,0,1,1,"Optica Applicata.","ROAD" -"0079-4929","0079-4929",1,1,0,0,0,0,1,1,0,0,0,0,"Právněhistorické studie","DOAJ" -"0100-3283","0100-3283",0,1,0,0,0,0,0,1,0,0,0,0,"Hansenologia Internationalis.","ROAD" -"0100-4042","0100-4042",1,1,0,0,1,1,1,1,0,0,1,1,"Química Nova","DOAJ" -"0100-8692","0100-8692",1,1,0,0,1,0,1,1,0,0,1,1,"Arquivos Brasileiros de Psicologia ","DOAJ" -"0102-4469","0102-4469",1,0,0,0,0,0,1,0,0,0,0,0,"Perspectiva Teológica","DOAJ" -"0102-6992","0102-6992",1,1,0,0,0,1,1,1,0,0,0,1,"Sociedade e Estado","DOAJ" -"0103-1570","0103-1570",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Sociedade & Natureza","DOAJ" -"0103-2070","0103-2070",1,1,0,0,1,1,1,1,0,0,1,1,"Tempo Social","DOAJ" -"0104-0588","0104-0588",1,1,0,0,1,0,1,1,0,0,1,0,"Revista de Estudos da Linguagem","DOAJ" -"0104-6497","0104-6497",1,1,0,0,1,0,1,1,0,0,1,0,"Nauplius","DOAJ" -"0104-8929","0104-8929",0,1,0,0,0,0,0,1,0,0,0,0,"Saeculum.","ROAD" -"0104-9496","0104-9496",1,0,0,0,0,0,1,0,0,0,0,0,"Revista do Direito","DOAJ" -"0120-0380","0120-0380",0,1,0,0,1,0,0,1,0,0,1,0,"Boletín de matemáticas.","ROAD" -"0120-100X","0120-100X",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Ion","DOAJ" -"0120-4807","0120-4807",1,1,0,0,0,0,1,1,0,0,0,0,"Universitas Humanística","DOAJ" -"0121-4004","0121-4004",1,0,0,0,1,1,1,0,0,0,1,1,"Vitae","DOAJ" -"0121-4500","0121-4500",1,1,0,0,0,0,1,1,0,0,0,0,"Avances en Enfermería","DOAJ" -"0121-8697","0121-8697",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Derecho","DOAJ" -"0122-5197","0122-5197",0,1,0,0,0,1,0,1,0,0,0,1,"Memoria y Sociedad.","ROAD" -"0161-0457","0161-0457",1,0,1,1,1,1,1,0,1,1,1,1,"Scanning","DOAJ" -"0215-4706","0215-4706",1,1,0,0,0,0,1,1,0,0,0,0,"Floribunda.","ROAD" -"0324-6000","0324-6000",0,1,0,0,1,1,0,1,0,0,1,1,"Periodica polytechnica. Electrical engineering","ROAD" -"0325-187X","0325-187X",1,1,0,0,0,1,1,1,0,0,0,1,"Meteorologica","DOAJ" -"0326-7237","0326-7237",1,1,0,0,0,1,1,1,0,0,0,1,"Geoacta.","ROAD" -"0327-1676","0327-1676",0,1,0,0,0,0,1,1,0,0,0,0,"Andes","DOAJ" -"0327-2818","0327-2818",1,0,0,0,0,0,1,0,0,0,0,0,"Dominguezia","DOAJ" -"0327-5108","0327-5108",1,0,0,0,0,0,1,0,0,0,0,0,"Páginas de Filosofía","DOAJ" -"0327-585X","0327-585X",1,1,0,0,0,0,1,1,0,0,0,0,"Actualidad Económica","DOAJ" -"0327-6147","0327-6147",0,1,0,0,0,0,0,1,0,0,0,0,"Papeles de trabajo.","ROAD" -"0327-7763","0327-7763",0,1,0,0,0,0,0,1,0,0,0,0,"Revista del IICE.","ROAD" -"0327-9286","0327-9286",1,1,0,0,0,0,1,1,0,0,0,0,"Acta Toxicológica Argentina","DOAJ" -"0328-1205","0328-1205",1,1,0,0,1,1,1,1,0,0,1,1,"Synthesis (La Plata)","DOAJ" -"0329-5893","0329-5893",0,1,0,0,0,0,0,1,0,0,0,0,"Investigaciones en psicología..","ROAD" -"0329-8213","0329-8213",1,0,0,0,0,0,1,0,0,0,0,0,"Historia Regional","DOAJ" -"0332-5024","0332-5024",1,1,0,0,0,0,1,1,0,0,0,0,"Studia Musicologica Norvegica","DOAJ" -"0350-185X","0350-185X",1,1,0,0,0,0,1,1,0,0,0,0,"Južnoslovenski Filolog","DOAJ" From 08dd2b2102c3a8074c854839cc4b18f948d24b95 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 18:09:41 +0200 Subject: [PATCH 182/287] moving the dependency version to the external pom file --- dhp-workflows/dhp-graph-mapper/pom.xml | 1 - pom.xml | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 665c01276..6c5e4609a 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -125,7 +125,6 @@ com.opencsv opencsv - 5.5 diff --git a/pom.xml b/pom.xml index 433c88093..d31ffb88d 100644 --- a/pom.xml +++ b/pom.xml @@ -519,6 +519,12 @@ ${common.text.version} + + com.opencsv + opencsv + 5.5 + + From 8cdce59e0ebd826efd1b8845011ce3f2bf494e71 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 12 Aug 2021 11:32:26 +0200 Subject: [PATCH 183/287] [graph raw] let the mapping exceptions propagate --- .../raw/AbstractMdRecordToOafMapper.java | 82 +++++++------------ .../raw/GenerateEntitiesApplication.java | 3 +- .../raw/GenerateEntitiesApplicationTest.java | 6 +- .../dhp/oa/graph/raw/odf_fwfebooklibrary.xml | 0 4 files changed, 36 insertions(+), 55 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 9aa4e4c31..526f45f6e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -28,10 +28,7 @@ import java.util.Optional; import java.util.Set; import org.apache.commons.lang3.StringUtils; -import org.dom4j.Document; -import org.dom4j.DocumentFactory; -import org.dom4j.DocumentHelper; -import org.dom4j.Node; +import org.dom4j.*; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -112,43 +109,40 @@ public abstract class AbstractMdRecordToOafMapper { this.forceOriginalId = false; } - public List processMdRecord(final String xml) { - try { - DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); + public List processMdRecord(final String xml) throws DocumentException { - final Document doc = DocumentHelper - .parseText( - xml - .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); + DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); - final KeyValue collectedFrom = getProvenanceDatasource( - doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + final Document doc = DocumentHelper + .parseText( + xml + .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); - if (collectedFrom == null) { - return Lists.newArrayList(); - } + final KeyValue collectedFrom = getProvenanceDatasource( + doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) - ? collectedFrom - : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - - if (hostedBy == null) { - return Lists.newArrayList(); - } - - final DataInfo info = prepareDataInfo(doc, invisible); - final long lastUpdateTimestamp = new Date().getTime(); - - final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); - - final String type = getResultType(doc, instances); - - return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); - } catch (final Exception e) { - throw new RuntimeException(e); + if (collectedFrom == null) { + return Lists.newArrayList(); } + + final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) + ? collectedFrom + : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); + + if (hostedBy == null) { + return Lists.newArrayList(); + } + + final DataInfo info = prepareDataInfo(doc, invisible); + final long lastUpdateTimestamp = new Date().getTime(); + + final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); + + final String type = getResultType(doc, instances); + + return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); } protected String getResultType(final Document doc, final List instances) { @@ -499,22 +493,6 @@ public abstract class AbstractMdRecordToOafMapper { return vocs.getTermAsQualifier(schemeId, classId); } - protected List prepareListStructProps( - final Node node, - final String xpath, - final String xpathClassId, - final String schemeId, - final DataInfo info) { - final List res = new ArrayList<>(); - - for (final Object o : node.selectNodes(xpath)) { - final Node n = (Node) o; - final String classId = n.valueOf(xpathClassId).trim(); - res.add(structuredProperty(n.getText(), prepareQualifier(classId, schemeId), info)); - } - return res; - } - protected List prepareListStructPropsWithValidQualifier( final Node node, final String xpath, diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 9027b49d7..6bb18c375 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -17,6 +17,7 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -158,7 +159,7 @@ public class GenerateEntitiesApplication { final String id, final String s, final boolean shouldHashId, - final VocabularyGroup vocs) { + final VocabularyGroup vocs) throws DocumentException { final String type = StringUtils.substringAfter(id, ":"); switch (type.toLowerCase()) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index 1e974dd69..67490a470 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -9,6 +9,7 @@ import java.io.IOException; import java.util.List; import org.apache.commons.io.IOUtils; +import org.dom4j.DocumentException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -44,7 +45,7 @@ class GenerateEntitiesApplicationTest { } @Test - void testMergeResult() throws IOException { + void testMergeResult() throws IOException, DocumentException { Result publication = getResult("oaf_record.xml", Publication.class); Result dataset = getResult("odf_dataset.xml", Dataset.class); Result software = getResult("odf_software.xml", Software.class); @@ -76,7 +77,8 @@ class GenerateEntitiesApplicationTest { assertEquals(resultType, merge.getResulttype().getClassid()); } - protected Result getResult(String xmlFileName, Class clazz) throws IOException { + protected Result getResult(String xmlFileName, Class clazz) + throws IOException, DocumentException { final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName)); return new OdfToOafMapper(vocs, false, true) .processMdRecord(xml) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml new file mode 100644 index 000000000..e69de29bb From 86d940044c067f71ac0bc451885ecd329f117cc6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 12 Aug 2021 11:32:56 +0200 Subject: [PATCH 184/287] added test to verify bad records from FWF-E-Book-Library --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 43 ++++++++----- .../dhp/oa/graph/raw/odf_fwfebooklibrary.xml | 61 +++++++++++++++++++ 2 files changed, 88 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 000dbfe25..5228d1d02 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -13,6 +13,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.dom4j.DocumentException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -48,7 +49,7 @@ class MappersTest { } @Test - void testPublication() throws IOException { + void testPublication() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); @@ -145,7 +146,7 @@ class MappersTest { } @Test - void testPublication_PubMed() throws IOException { + void testPublication_PubMed() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record_pubmed.xml"))); @@ -217,7 +218,7 @@ class MappersTest { } @Test - void testPublicationInvisible() throws IOException { + void testPublicationInvisible() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); @@ -233,7 +234,17 @@ class MappersTest { } @Test - void testDataset() throws IOException { + void testOdfFwfEBookLibrary() throws IOException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_fwfebooklibrary.xml"))); + + assertThrows( + IllegalArgumentException.class, + () -> new OdfToOafMapper(vocs, false, true).processMdRecord(xml)); + } + + @Test + void testDataset() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -346,7 +357,7 @@ class MappersTest { } @Test - void testOdfBielefeld() throws IOException { + void testOdfBielefeld() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_bielefeld.xml"))); @@ -394,7 +405,7 @@ class MappersTest { } @Test - void testOpentrial() throws IOException { + void testOpentrial() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_opentrial.xml"))); @@ -511,7 +522,7 @@ class MappersTest { } @Test - void testSoftware() throws IOException { + void testSoftware() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_software.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -530,7 +541,7 @@ class MappersTest { } @Test - void testClaimDedup() throws IOException { + void testClaimDedup() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_dedup.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -544,7 +555,7 @@ class MappersTest { } @Test - void testNakala() throws IOException { + void testNakala() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_nakala.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -572,7 +583,7 @@ class MappersTest { } @Test - void testEnermaps() throws IOException { + void testEnermaps() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("enermaps.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -597,7 +608,7 @@ class MappersTest { } @Test - void testClaimFromCrossref() throws IOException { + void testClaimFromCrossref() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_crossref.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -614,7 +625,7 @@ class MappersTest { } @Test - void testODFRecord() throws IOException { + void testODFRecord() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_record.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -628,7 +639,7 @@ class MappersTest { } @Test - void testTextGrid() throws IOException { + void testTextGrid() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("textgrid.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -662,7 +673,7 @@ class MappersTest { } @Test - void testBologna() throws IOException { + void testBologna() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf-bologna.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -679,7 +690,7 @@ class MappersTest { } @Test - void testJairo() throws IOException { + void testJairo() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_jairo.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -703,7 +714,7 @@ class MappersTest { } @Test - void testOdfFromHdfs() throws IOException { + void testOdfFromHdfs() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_from_hdfs.xml"))); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml index e69de29bb..cead018d1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml @@ -0,0 +1,61 @@ + + +
+ oai:e-book.fwf.ac.at:o:116 + 2020-04-08T13:25:21.742Z + text + 2021-08-12T00:16:55.365Z +
+ + + + 978-3-205-77704-5 + https://e-book.fwf.ac.at/o:116 + + + Although small and not particularly peoples both Chechens and Palestinians became famous for suicide bomber attacks in recent years. This can - partly - be explained by the unrecognised collective traumas of the past. Both Chechens and Palestinians experienced collective traumas in the 1940ties. The entire Chechen population wad deported by Josef Stalin to Kasakhstan, Kirgysia and Sibiria in February 1944 under the pretext of collaboration with the Third Reich. Those who survived were allowed to return in 1957 to Chechenya. Half of the Palestinian Arab population was expelled from Palestine in 1947/48, when fighting erupted between Jews and Arabs. The refugees were never allowed to return. The memory of the deportation/expulsion was kept alive. The founding traumas contributed to the development of Chechen and Palestinian nationalism. Chechens and Palestinians till today suffer from their collective traumas, which stayed unrecognised and therefore create psychological and political problems for the following generations - and for their adverseries. The phenomenon of the "closed circle of violence" created a phobic collective behaviour, which led for example Chechens to the illusionary declaration of independence in 1991. It also led to the individual overreaction of young Chechens or Palestinians, who became living bombs. The collective Trauma, if untreated, poses a threat to any peaceful political solution. + 1. Einleitung Ausgehend von der Fragestellung, warum gerade bei Tschetschenen und Palästinensern der Selbstmordterrorismus in den letzten Jahren so populär geworden ist, analysiert die Autorin die Geschichte dieser beiden Völker. Einer der Gründe ist bisher wenig beachtet worden. Der Einfluss eines kollektiven Traumas, das als solches nicht anerkannt, behandelt und auch nicht einer politischen Lösung zugeführt wurde. 2. Geschichte der Palästinenser und Tschetschenen Im Zuge der Errichtung Israels im Unabhängigkeitskrieg 1948 verlor die Hälfte des palästinensischen Volkes - 750.000 Menschen - ihre Heimat. Unter der Führung von Jassir Arafat kämpften sie in den Jahrzehnten danach - mit Gewalt und am Verhandlungstisch - um einen eigenen Staat. Das Recht auf Rückkehr spielte dabei immer eine besondere Rolle. Die "Nakbah", die als Katastrophe empfundene Vertreibung 1948, wurde dabei Bezugs- und Angelpunkt mehrer Generationen von Flüchtlingen. Die Weigerung Israels, die Mitverantwortung für die Vertreibung der Palästinenser zu übernehmen und das kollektive Trauma der Palästinenser anzuerkennen - aus Angst vor einer Infragestellung des eigenen Staates - ist einer der Gründe, warum der Nahostkonflikt bisher nicht gelöst werden konnte. Auch die Tschetschenen durften jahrzehntelang über die Deportation ihres Volkes nicht einmal sprechen. Hatte Josef Stallin sie erst unter dem Vorwand der Kollaboration mit Nazi-Deutschland deportiert, waren sie zwar nach seinem Tod in die Heimat zurückgekehrt, lebten dort aber jahrzehntelang weiterhin als "unzuverlässiges Volk". Das kollektive Trauma der Deportation konnte nur mündlich überliefert werden. Mit dem Zusammenbruch der Sowjetunion brach der ungelöste Konflikt zwischen Tschetschenien und Russland sofort auf, das Land ging in blutigen Kriegen unter. 3. Zusammenfassung Die kollektive Erinnerung ist in den vergangenen Jahrzehnten zu einem zentralen Forschungsthema geworden. Der vorsichtige Einsatz von in der Individualpsychologie gewonnenen Erkenntnissen in der Behandlung von kollektiven Traumata, um zu einer politischen Lösung zu kommen, ist eine Chance. Das Studium historischer Fakten in Kombination mit den Erkenntnissen der Psychologie und Psychiatrie bietet die Basis für eine politische Lösung. Die vorliegende Arbeit zeigt, dass kollektive Traumata, die nicht behandelt werden, immer wieder, auch Generationen später, zu kollektiven Reaktionen führen können, die auf den ersten Blick irrational erscheinen. Die vielleicht radikalste Form des politischen Widerstandes, das Selbstmordattentat, ist dafür ein Beispiel. + deu/ger + Böhlau + application/pdf + + Trauma und Terror: Zum palästinensischen und tschetschenischen Nationalismus + + + + Szyszkowitz, Tessa + Tessa + Szyszkowitz + + + + Trauma, Terror, Palestinians, Suicide attacks, Recognition, Chechens + ÖFOS 2002, Contemporary history + BIC Standard Subject Categories, Postwar 20th century history, from c 1945 to c 2000 (HBLW3) + ÖFOS 2002, Zeitgeschichte + + + 14.02 MB + + + 2007 + + + literature + 2007-01-01 + http://creativecommons.org/licenses/by-nc-nd/3.0/at/ + http://creativecommons.org/licenses/by-nc-nd/3.0/at/ + deu/ger + fwf_________::D 3929 + + + https://fedora.e-book.fwf.ac.at/fedora/get/o:116/bdef:Content/download + +
\ No newline at end of file From 6e84b3951f685061cc61e71c124cf4a7518ccee2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:57:41 +0200 Subject: [PATCH 185/287] GetCSV refactoring - moving classes to dhp-common that have dependency with GetCSV class (that was located in graph-mapper) --- .../common/aggregation}/AggregatorReport.java | 2 +- .../collection/CollectorException.java | 2 +- .../dnetlib/dhp/common/collection/GetCSV.java | 65 +++++++++++++++++++ .../common}/collection/HttpClientParams.java | 2 +- .../common}/collection/HttpConnector2.java | 4 +- 5 files changed, 70 insertions(+), 5 deletions(-) rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common => dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation}/AggregatorReport.java (96%) rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp => dhp-common/src/main/java/eu/dnetlib/dhp/common}/collection/CollectorException.java (93%) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp => dhp-common/src/main/java/eu/dnetlib/dhp/common}/collection/HttpClientParams.java (97%) rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp => dhp-common/src/main/java/eu/dnetlib/dhp/common}/collection/HttpConnector2.java (98%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java index 8e46ab92b..c5926848e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.aggregation.common; +package eu.dnetlib.dhp.common.aggregation; import java.io.Closeable; import java.io.IOException; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java index 144d297e6..5d94c2f89 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; public class CollectorException extends Exception { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java new file mode 100644 index 000000000..44f4121eb --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java @@ -0,0 +1,65 @@ + +package eu.dnetlib.dhp.common.collection; + +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetCSV { + + public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, + String modelClass) throws IOException, ClassNotFoundException { + getCsv(fileSystem, reader, hdfsPath, modelClass, ','); + + } + + public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, + String modelClass, char delimiter) throws IOException, ClassNotFoundException { + + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + try(BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))){ + + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(reader) + .withType(Class.forName(modelClass)) + .withSeparator(delimiter) + .build() + .parse() + .forEach(line -> { + try { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java index ab0d5cc02..6fcec00dd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; /** * Bundles the http connection parameters driving the client behaviour. diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 8493a3436..724f5f0e1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; import static eu.dnetlib.dhp.utils.DHPUtils.*; @@ -15,7 +15,7 @@ import org.apache.http.HttpHeaders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; /** * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java From bfe8f5335c733177b338b54f1e63936c62b362cb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:58:14 +0200 Subject: [PATCH 186/287] GetCSV refactoring - copied model classes in test path --- .../collection/models/CSVProgramme.java | 80 +++++++++++++++++ .../common/collection/models/CSVProject.java | 90 +++++++++++++++++++ .../common/collection/models/DOAJModel.java | 52 +++++++++++ .../collection/models/UnibiGoldModel.java | 45 ++++++++++ 4 files changed, 267 insertions(+) create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java new file mode 100644 index 000000000..d17fcc75e --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java @@ -0,0 +1,80 @@ + +package eu.dnetlib.dhp.common.collection.models; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; +import com.opencsv.bean.CsvIgnore; + +/** + * The model for the programme csv file + */ +public class CSVProgramme implements Serializable { + + @CsvBindByName(column = "code") + private String code; + + @CsvBindByName(column = "title") + private String title; + + @CsvBindByName(column = "shortTitle") + private String shortTitle; + + @CsvBindByName(column = "language") + private String language; + + @CsvIgnore + private String classification; + + @CsvIgnore + private String classification_short; + + public String getClassification_short() { + return classification_short; + } + + public void setClassification_short(String classification_short) { + this.classification_short = classification_short; + } + + public String getClassification() { + return classification; + } + + public void setClassification(String classification) { + this.classification = classification; + } + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getShortTitle() { + return shortTitle; + } + + public void setShortTitle(String shortTitle) { + this.shortTitle = shortTitle; + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } + +} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java new file mode 100644 index 000000000..19606a09e --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java @@ -0,0 +1,90 @@ + +package eu.dnetlib.dhp.common.collection.models; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +/** + * the mmodel for the projects csv file + */ +public class CSVProject implements Serializable { + + @CsvBindByName(column = "id") + private String id; + + @CsvBindByName(column = "status") + private String status; + + @CsvBindByName(column = "programme") + private String programme; + + @CsvBindByName(column = "topics") + private String topics; + + @CsvBindByName(column = "title") + private String title; + + @CsvBindByName(column = "call") + private String call; + + @CsvBindByName(column = "subjects") + private String subjects; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getProgramme() { + return programme; + } + + public void setProgramme(String programme) { + this.programme = programme; + } + + public String getTopics() { + return topics; + } + + public void setTopics(String topics) { + this.topics = topics; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getCall() { + return call; + } + + public void setCall(String call) { + this.call = call; + } + + public String getSubjects() { + return subjects; + } + + public void setSubjects(String subjects) { + this.subjects = subjects; + } + +} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java new file mode 100644 index 000000000..156ba3632 --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.common.collection.models; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") + private String journalTitle; + + @CsvBindByName(column = "Journal ISSN (print version)") + private String issn; + + @CsvBindByName(column = "Journal EISSN (online version)") + private String eissn; + + @CsvBindByName(column = "Review process") + private String reviewProcess; + + public String getJournalTitle() { + return journalTitle; + } + + public void setJournalTitle(String journalTitle) { + this.journalTitle = journalTitle; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getReviewProcess() { + return reviewProcess; + } + + public void setReviewProcess(String reviewProcess) { + this.reviewProcess = reviewProcess; + } +} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java new file mode 100644 index 000000000..ae47262bf --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java @@ -0,0 +1,45 @@ + +package eu.dnetlib.dhp.common.collection.models; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") + private String issn; + @CsvBindByName(column = "ISSN_L") + private String issn_l; + @CsvBindByName(column = "TITLE") + private String title; + @CsvBindByName(column = "TITLE_SOURCE") + private String title_source; + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getIssn_l() { + return issn_l; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getTitle_source() { + return title_source; + } + + public void setTitle_source(String title_source) { + this.title_source = title_source; + } +} From 733bcaecf621657dfbaf1aec886b3071216dd924 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:58:52 +0200 Subject: [PATCH 187/287] GetCSV refactoring - added test class (all the tests are disabled since they refer to remote resource) --- .../dhp/common/collection/GetCSVTest.java | 245 ++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java new file mode 100644 index 000000000..bf5e3dedb --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java @@ -0,0 +1,245 @@ + +package eu.dnetlib.dhp.common.collection; + +import java.io.*; +import java.nio.file.Files; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.common.collection.models.CSVProgramme; +import eu.dnetlib.dhp.common.collection.models.CSVProject; +import eu.dnetlib.dhp.common.collection.models.DOAJModel; +import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel; + +public class GetCSVTest { + + private static String workingDir; + + private static LocalFileSystem fs; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(GetCSVTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + + @Disabled + @Test + void getProgrammeFileTest() throws Exception { + + String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class); + if (count == 0) { + Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); + Assertions + .assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + Assertions + .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + Assertions.assertTrue(csvp.getShortTitle().equals("")); + Assertions.assertTrue(csvp.getLanguage().equals("en")); + } + if (count == 28) { + Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); + Assertions + .assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + Assertions + .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + Assertions.assertTrue(csvp.getLanguage().equals("de")); + } + if (count == 229) { + Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); + Assertions + .assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + Assertions + .assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + Assertions.assertTrue(csvp.getLanguage().equals("en")); + } + Assertions.assertTrue(csvp.getCode() != null); + Assertions.assertTrue(csvp.getCode().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(767, count); + } + + @Disabled + @Test + void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { + String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv"; + // String fileURL = "/Users/miriam.baglioni/Downloads/cordis-h2020projects.csv"; + + GetCSV + .getCsv( + fs, + new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) + // new BufferedReader(new FileReader(fileURL)) + , workingDir + "/projects", + "eu.dnetlib.dhp.common.collection.models.CSVProject", ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class); + if (count == 0) { + Assertions.assertTrue(csvp.getId().equals("771736")); + Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.1.")); + Assertions.assertTrue(csvp.getTopics().equals("ERC-2017-COG")); + + } + if (count == 22882) { + Assertions.assertTrue(csvp.getId().equals("752903")); + Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2.")); + Assertions.assertTrue(csvp.getTopics().equals("MSCA-IF-2016")); + } + if (count == 223023) { + Assertions.assertTrue(csvp.getId().equals("861952")); + Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.4.e.")); + Assertions.assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019")); + } + Assertions.assertTrue(csvp.getId() != null); + Assertions.assertTrue(csvp.getProgramme().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(34957, count); + } + + @Disabled + @Test + void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.UnibiGoldModel", ','); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); + if (count == 0) { + Assertions.assertTrue(unibi.getIssn().equals("0001-625X")); + Assertions.assertTrue(unibi.getIssn_l().equals("0001-625X")); + Assertions.assertTrue(unibi.getTitle().equals("Acta Mycologica")); + + } + if (count == 43158) { + Assertions.assertTrue(unibi.getIssn().equals("2088-6330")); + Assertions.assertTrue(unibi.getIssn_l().equals("2088-6330")); + Assertions.assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); + + } + if (count == 67027) { + Assertions.assertTrue(unibi.getIssn().equals("2658-7068")); + Assertions.assertTrue(unibi.getIssn_l().equals("2308-2488")); + Assertions.assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); + } + + count += 1; + } + + Assertions.assertEquals(67028, count); + } + + @Disabled + @Test + void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://doaj.org/csv"; + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) { + try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) { + String line; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + } + } + + GetCSV + .getCsv( + fs, new BufferedReader( + new FileReader("/tmp/DOAJ_1.csv")), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.DOAJModel", ','); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); + if (count == 0) { + Assertions.assertTrue(doaj.getIssn().equals("0001-3765")); + Assertions.assertTrue(doaj.getEissn().equals("1678-2690")); + Assertions.assertTrue(doaj.getJournalTitle().equals("Anais da Academia Brasileira de Ciências")); + + } + if (count == 7902) { + + Assertions.assertTrue(doaj.getIssn().equals("")); + Assertions.assertTrue(doaj.getEissn().equals("2055-7159")); + Assertions.assertTrue(doaj.getJournalTitle().equals("BJR|case reports")); + } + if (count == 16703) { + + Assertions.assertTrue(doaj.getIssn().equals("")); + Assertions.assertTrue(doaj.getEissn().equals("2788-6298")); + Assertions + .assertTrue(doaj.getJournalTitle().equals("Teacher Education through Flexible Learning in Africa")); + } + + count += 1; + } + + Assertions.assertEquals(16709, count); + } + +} From 7402daf51a34df27fa303c53143ed61ed5c48e3e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:59:19 +0200 Subject: [PATCH 188/287] GetCSV refactoring - added dependency to open-csv lib --- dhp-common/pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 4c7810c47..c057123b1 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -117,6 +117,11 @@ eu.dnetlib.dhp dhp-schemas + + + com.opencsv + opencsv + From d36e925277b71168a0dcb440a2f5e4d1624d5359 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:00:21 +0200 Subject: [PATCH 189/287] GetCSV refactoring - moved under model package --- .../dhp/actionmanager/project/utils/{ => model}/EXCELTopic.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/{ => model}/EXCELTopic.java (97%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java index 5607df118..fa2e3422e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.project.utils; +package eu.dnetlib.dhp.actionmanager.project.utils.model; import java.io.Serializable; From b62cd656a78b763c0db77dd9f625bb2cfab99937 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:01:10 +0200 Subject: [PATCH 190/287] GetCSV refactoring - changed the model to store only the information needed --- .../utils/{ => model}/CSVProgramme.java | 24 +++++---- .../project/utils/model/CSVProject.java | 51 +++++++++++++++++++ 2 files changed, 64 insertions(+), 11 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/{ => model}/CSVProgramme.java (77%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java similarity index 77% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java index d486f0104..ea89a75eb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java @@ -1,20 +1,31 @@ -package eu.dnetlib.dhp.actionmanager.project.utils; +package eu.dnetlib.dhp.actionmanager.project.utils.model; import java.io.Serializable; +import com.opencsv.bean.CsvBindByName; + /** * The model for the programme csv file */ public class CSVProgramme implements Serializable { - private String rcn; + @CsvBindByName(column = "code") private String code; + @CsvBindByName(column = "title") private String title; + + @CsvBindByName(column = "shortTitle") private String shortTitle; + + @CsvBindByName(column = "language") private String language; + + @CsvBindByName(column = "classification") private String classification; + + @CsvBindByName(column = "classification_short") private String classification_short; public String getClassification_short() { @@ -33,14 +44,6 @@ public class CSVProgramme implements Serializable { this.classification = classification; } - public String getRcn() { - return rcn; - } - - public void setRcn(String rcn) { - this.rcn = rcn; - } - public String getCode() { return code; } @@ -73,5 +76,4 @@ public class CSVProgramme implements Serializable { this.language = language; } -// } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java new file mode 100644 index 000000000..73cea0539 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java @@ -0,0 +1,51 @@ + +package eu.dnetlib.dhp.actionmanager.project.utils.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +/** + * the mmodel for the projects csv file + */ +public class CSVProject implements Serializable { + + @CsvBindByName(column = "id") + private String id; + + @CsvBindByName(column = "programme") + private String programme; + + @CsvBindByName(column = "topics") + private String topics; + + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + + + public String getProgramme() { + return programme; + } + + public void setProgramme(String programme) { + this.programme = programme; + } + + public String getTopics() { + return topics; + } + + public void setTopics(String topics) { + this.topics = topics; + } + + + +} From 4317211a2b5e64ad9e4a969642ab674182854593 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:03:14 +0200 Subject: [PATCH 191/287] GetCSV refactoring - refactoring due to movement --- .../dnetlib/dhp/actionmanager/project/utils/EXCELParser.java | 2 ++ .../eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 5ce730692..95c6f987a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -17,6 +17,8 @@ import org.apache.poi.ss.usermodel.Row; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic; + /** * Reads a generic excel file and maps it into classes that mirror its schema */ diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java index 359e46fc7..9e73cbc37 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java @@ -16,8 +16,8 @@ import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpConnector2; /** * Applies the parsing of an excel file and writes the Serialization of it in hdfs From e9fc3ef3bc4f83ab6e0e93188a5782cefdea72d3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:03:41 +0200 Subject: [PATCH 192/287] GetCSV refactoring - changed to use the new class to get and write the csv file --- .../actionmanager/project/utils/ReadCSV.java | 72 +++---------------- 1 file changed, 9 insertions(+), 63 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java index 1ae775bec..c967d4cae 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java @@ -1,36 +1,21 @@ package eu.dnetlib.dhp.actionmanager.project.utils; -import java.io.BufferedWriter; -import java.io.Closeable; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.nio.charset.StandardCharsets; +import java.io.*; import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; /** * Applies the parsing of a csv file and writes the Serialization of it in hdfs */ -public class ReadCSV implements Closeable { - private static final Log log = LogFactory.getLog(ReadCSV.class); - - private final BufferedWriter writer; - private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final String csvFile; - private final char delimiter; +public class ReadCSV { public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -50,57 +35,18 @@ public class ReadCSV implements Closeable { char del = ';'; if (delimiter.isPresent()) del = delimiter.get().charAt(0); - try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, del)) { - log.info("Getting CSV file..."); - readCSV.execute(classForName); - } - - } - - public void execute(final String classForName) - throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException { - CSVParser csvParser = new CSVParser(); - csvParser - .parse(csvFile, classForName, delimiter) - .stream() - .forEach(this::write); - } - - @Override - public void close() throws IOException { - writer.close(); - } - - public ReadCSV( - final String hdfsPath, - final String hdfsNameNode, - final String fileURL, - char delimiter) - throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); - HttpConnector2 httpConnector = new HttpConnector2(); + FileSystem fileSystem = FileSystem.get(conf); - Path hdfsWritePath = new Path(hdfsPath); + BufferedReader reader = new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))); - if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath, false); - } - final FSDataOutputStream fos = fileSystem.create(hdfsWritePath); + GetCSV.getCsv(fileSystem, reader, hdfsPath, classForName, del); - this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); - this.csvFile = httpConnector.getInputSource(fileURL); - this.delimiter = delimiter; - } + reader.close(); - protected void write(final Object p) { - try { - writer.write(OBJECT_MAPPER.writeValueAsString(p)); - writer.newLine(); - } catch (final Exception e) { - throw new RuntimeException(e); - } } } From 7a789423aa37fa3f207f317b2f9181c4420c3fe5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:04:27 +0200 Subject: [PATCH 193/287] GetCSV refactoring - refactoring due to movement of classes --- .../eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java | 2 +- .../eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java index 40cf5ee53..686b0fc7f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -20,7 +20,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import scala.Tuple2; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index b1a381415..8efc76a0e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import scala.Tuple2; From f0845e9865b5714c8e1f14ffed9eddfc4b3cafd6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:04:58 +0200 Subject: [PATCH 194/287] GetCSV refactoring - refactoring due to movement of classes --- .../dhp/actionmanager/project/SparkAtomicActionJob.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index a4a0bf6a4..cc1411b31 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager.project; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.Arrays; -import java.util.HashMap; import java.util.Objects; import java.util.Optional; @@ -22,9 +21,9 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; -import eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; +import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; From 335a824e348c9e1eaf81d6c021f48323a7e599d6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:10:10 +0200 Subject: [PATCH 195/287] GetCSV refactoring - fixed issue --- .../eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 95c6f987a..a520176f4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -32,7 +32,7 @@ public class EXCELParser { XSSFSheet sheet = wb.getSheet(sheetName); - if (sheetName == null) { + if (sheet == null) { throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file"); } From ab8abd61bbf29b6f6c1ae93b51eb63fc3f219ab8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:11:07 +0200 Subject: [PATCH 196/287] GetCSV refactoring - refactoring due to movement of classes --- .../dhp/actionmanager/project/oozie_app/workflow.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index e4f2715fb..bd864a6aa 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -58,7 +58,7 @@ --hdfsNameNode${nameNode} --fileURL${projectFileURL} --hdfsPath${workingDir}/projects - --classForNameeu.dnetlib.dhp.actionmanager.project.utils.CSVProject + --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject @@ -70,7 +70,7 @@ --hdfsNameNode${nameNode} --fileURL${programmeFileURL} --hdfsPath${workingDir}/programme - --classForNameeu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme + --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme @@ -83,7 +83,7 @@ --fileURL${topicFileURL} --hdfsPath${workingDir}/topic --sheetName${sheetName} - --classForNameeu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic + --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic From 9da74b544adf252f84cec924598d88e71293b015 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:12:15 +0200 Subject: [PATCH 197/287] GetCSV refactoring - refactoring due to movement of classes --- .../dnetlib/dhp/actionmanager/project/EXCELParserTest.java | 6 +++--- .../actionmanager/project/PrepareH2020ProgrammeTest.java | 2 +- .../dhp/actionmanager/project/PrepareProjectTest.java | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index cc36421a0..d27a732ea 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -13,8 +13,8 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.actionmanager.project.utils.EXCELParser; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpConnector2; @Disabled public class EXCELParserTest { @@ -25,7 +25,7 @@ public class EXCELParserTest { @BeforeAll public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(CSVParserTest.class.getSimpleName()); + workingDir = Files.createTempDirectory(EXCELParserTest.class.getSimpleName()); } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index f128b5610..680872126 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -21,7 +21,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; public class PrepareH2020ProgrammeTest { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index f0f3532aa..ee5f78b0c 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -21,7 +21,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; public class PrepareProjectTest { From d57b2bb9273c33d3348a3abf282245eaec1e2ae2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:12:51 +0200 Subject: [PATCH 198/287] GetCSV refactoring - removing not needed dependency --- dhp-workflows/dhp-aggregation/pom.xml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 294287008..98e22d8a3 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -84,14 +84,6 @@ json - - - org.apache.commons - commons-csv - 1.8 - - - org.apache.poi From 6b9e1bf2e3787bb5eb802508721a174981b7bffe Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:17:50 +0200 Subject: [PATCH 199/287] GetCSV refactoring - removing not needed dependency --- .../java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java index 549169673..dbf053a72 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java @@ -6,6 +6,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; + public abstract class ReportingJob { /** From 8769dd8eef6668bbce2e238175f2d36948c2b258 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:20:56 +0200 Subject: [PATCH 200/287] GetCSV refactoring - refactoring due to movement of classes --- .../java/eu/dnetlib/dhp/collection/CollectorWorker.java | 4 +++- .../dnetlib/dhp/collection/CollectorWorkerApplication.java | 4 +++- .../eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java | 4 ++-- .../collection/plugin/mongodb/MDStoreCollectorPlugin.java | 4 ++-- .../plugin/mongodb/MongoDbDumpCollectorPlugin.java | 4 ++-- .../dhp/collection/plugin/oai/OaiCollectorPlugin.java | 6 +++--- .../eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java | 6 +++--- .../dhp/collection/plugin/oai/OaiIteratorFactory.java | 6 +++--- .../dhp/collection/plugin/rest/RestCollectorPlugin.java | 6 +++--- .../eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java | 4 ++-- .../dnetlib/dhp/transformation/TransformSparkJobNode.java | 2 +- .../eu/dnetlib/dhp/collection/CollectionWorkflowTest.java | 1 + .../dhp/collection/plugin/rest/RestCollectorPluginTest.java | 6 +++--- .../dhp/collection/plugin/rest/RestIteratorTest.java | 2 +- 14 files changed, 32 insertions(+), 27 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index d0872da1d..2ea3f35cc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -16,7 +16,6 @@ import org.apache.hadoop.io.compress.DeflateCodec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.aggregation.common.ReporterCallback; import eu.dnetlib.dhp.aggregation.common.ReportingJob; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; @@ -24,6 +23,9 @@ import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; public class CollectorWorker extends ReportingJob { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java index 545cbab0c..708d2da65 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java @@ -13,8 +13,10 @@ import org.apache.hadoop.fs.FileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java index 457f63468..841d42fea 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java @@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin; import java.util.stream.Stream; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; public interface CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java index 549c59720..ad28a7261 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java @@ -13,11 +13,11 @@ import org.slf4j.LoggerFactory; import com.mongodb.client.MongoCollection; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.common.MdstoreClient; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; public class MDStoreCollectorPlugin implements CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java index ec5bab448..036404141 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java @@ -12,10 +12,10 @@ import java.util.zip.GZIPInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.utils.DHPUtils; public class MongoDbDumpCollectorPlugin implements CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index 9918e4abe..878e286e0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -13,11 +13,11 @@ import com.google.common.base.Splitter; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; public class OaiCollectorPlugin implements CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index 331dee6b4..3f767fd31 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -19,10 +19,10 @@ import org.dom4j.io.XMLWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpConnector2; import eu.dnetlib.dhp.collection.XmlCleaner; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpConnector2; public class OaiIterator implements Iterator { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java index 48f6a94c8..1838223c2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java @@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin.oai; import java.util.Iterator; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; -import eu.dnetlib.dhp.collection.HttpClientParams; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.common.collection.HttpConnector2; public class OaiIteratorFactory { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java index be2bbcece..997948687 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java @@ -9,11 +9,11 @@ import java.util.stream.StreamSupport; import org.apache.commons.lang3.StringUtils; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * TODO: delegate HTTP requests to the common HttpConnector2 implementation. diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java index a90d259b4..64a041fd4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java @@ -30,9 +30,9 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.collection.JsonUtils; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * log.info(...) equal to log.trace(...) in the application-logs diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index a01703675..4fe79bf76 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -23,8 +23,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java index 05dd6b1d3..1a10b5f64 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java @@ -18,6 +18,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java index f2b873e10..f708c367b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java @@ -12,10 +12,10 @@ import org.junit.jupiter.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * @author js, Andreas Czerniak diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java index 9f75bd468..906f69dc9 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java @@ -9,7 +9,7 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.collection.HttpClientParams; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * From ed183d878e5899aa998d3dd6b0de0ffa10353752 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 09:28:51 +0200 Subject: [PATCH 201/287] GetCSV refactoring - modified test classes due to change in the model of projects and programme --- .../project/PrepareH2020ProgrammeTest.java | 2 +- .../project/PrepareProjectTest.java | 1 + .../project/SparkUpdateProjectTest.java | 2 +- .../project/preparedProgramme_whole.json | 277 ++++++++++++++++++ .../project/prepared_projects.json | 34 +-- .../project/projects_subset.json | 32 +- .../project/whole_programme.json.gz | Bin 31593 -> 29456 bytes 7 files changed, 313 insertions(+), 35 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index 680872126..68aacdc8b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -92,7 +92,7 @@ public class PrepareH2020ProgrammeTest { Assertions.assertEquals(0, verificationDataset.filter("classification = ''").count()); - // tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme))); + //tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme))); Assertions .assertEquals( diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index ee5f78b0c..d0c50a054 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -9,6 +9,7 @@ import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index a77dbace4..58365e026 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -78,7 +78,7 @@ public class SparkUpdateProjectTest { "-programmePath", getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz") + "/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json") .getPath(), "-projectPath", getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json").getPath(), diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json new file mode 100644 index 000000000..e8e04e8db --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json @@ -0,0 +1,277 @@ +{"code":"H2020-EU.5.g.","title":"Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts","classification_short":"Science with and for Society | Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts"} +{"code":"H2020-EU.3.4.2.1.","title":"A substantial reduction of traffic congestion","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | A substantial reduction of traffic congestion","classification_short":"Societal Challenges | Transport | Mobility, safety and security | A substantial reduction of traffic congestion"} +{"code":"H2020-EU.3.4.5.4.","title":"ITD Airframe","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Airframe","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Airframe"} +{"code":"H2020-EU.3.3.8.1.","title":"Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs"} +{"code":"H2020-EU.3.7.1.","title":"Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs","classification_short":"Societal Challenges | Secure societies | Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs"} +{"code":"H2020-EU.3.4.1.1.","title":"Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration"} +{"code":"H2020-EU.1.4.3.","title":"Reinforcing European research infrastructure policy and international cooperation","shortTitle":"Research infrastructure policy and international cooperation","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation"} +{"code":"H2020-EU.1.4.","title":"EXCELLENT SCIENCE - Research Infrastructures","shortTitle":"Research Infrastructures","language":"en","classification":"Excellent science | Research Infrastructures","classification_short":"Excellent Science | Research Infrastructures"} +{"code":"H2020-EU.3.4.6.1.","title":"Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | FCH2 (transport objectives) | Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies","classification_short":"Societal Challenges | Transport | FCH2 (transport objectives) | Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies"} +{"code":"H2020-EU.3.4.5.5.","title":"ITD Engines","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Engines","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Engines"} +{"code":"H2020-EU.2.1.1.7.3.","title":"Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking"} +{"code":"H2020-EU.3.1.6.1.","title":"Promoting integrated care","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care | Promoting integrated care","classification_short":"Societal Challenges | Health | Health care provision and integrated care | Promoting integrated care"} +{"code":"H2020-EU.3.7.6.","title":"Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management","classification_short":"Societal Challenges | Secure societies | Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management"} +{"code":"H2020-EU.3.4.2.3.","title":"Developing new concepts of freight transport and logistics","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Developing new concepts of freight transport and logistics","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Developing new concepts of freight transport and logistics"} +{"code":"H2020-EU.3.3.2.1.","title":"Develop the full potential of wind energy","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop the full potential of wind energy","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop the full potential of wind energy"} +{"code":"H2020-EU.3.2.5.","title":"Cross-cutting marine and maritime research","shortTitle":"Cross-cutting marine and maritime research","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research"} +{"code":"H2020-EU.3.4.7.","title":"SESAR JU","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | SESAR JU","classification_short":"Societal Challenges | Transport | SESAR JU"} +{"code":"H2020-EU.2.1.3.3.","title":"Management of materials components","shortTitle":"Management of materials components","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Management of materials components","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Management of materials components"} +{"code":"H2020-EU.3.3.3.","title":"Alternative fuels and mobile energy sources","shortTitle":"Alternative fuels and mobile energy sources","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources"} +{"code":"H2020-EU.7.","title":"THE EUROPEAN INSTITUTE OF INNOVATION AND TECHNOLOGY (EIT)","shortTitle":"European Institute of Innovation and Technology (EIT)","language":"en","classification":"THE EUROPEAN INSTITUTE OF INNOVATION AND TECHNOLOGY (EIT)","classification_short":"European Institute of Innovation and Technology (EIT)"} +{"code":"H2020-EU.3.5.4.1.","title":"Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake"} +{"code":"H2020-EU.3.1.4.","title":"Active ageing and self-management of health","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health","classification_short":"Societal Challenges | Health | Active ageing and self-management of health"} +{"code":"H2020-EU.1.","title":"Excellent science","shortTitle":"Excellent Science","language":"en","classification":"Excellent science","classification_short":"Excellent Science"} +{"code":"H2020-EU.3.5.6.1.","title":"Identifying resilience levels via observations, monitoring and modelling","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage | Identifying resilience levels via observations, monitoring and modelling","classification_short":"Societal Challenges | Climate and environment | Cultural heritage | Identifying resilience levels via observations, monitoring and modelling"} +{"code":"H2020-EU.3.2.4.3.","title":"Supporting market development for bio-based products and processes","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Supporting market development for bio-based products and processes","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Supporting market development for bio-based products and processes"} +{"code":"H2020-EU.2.1.6.1.","title":"Enabling European competitiveness, non-dependence and innovation of the European space sector","shortTitle":"Competitiveness, non-dependence and innovation","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation"} +{"code":"H2020-EU.4.b.","title":"Twinning of research institutions","shortTitle":"Twinning of research institutions","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Twinning of research institutions","classification_short":"Spreading excellence and widening participation | Twinning of research institutions"} +{"code":"H2020-EU.3.1.7.6.","title":"Psychiatric diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Psychiatric diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Psychiatric diseases"} +{"code":"H2020-EU.3.1.2.2.","title":"Improving diagnosis and prognosis","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Improving diagnosis and prognosis","classification_short":"Societal Challenges | Health | Preventing disease | Improving diagnosis and prognosis"} +{"code":"H2020-EU.3.4.5.3.","title":"IADP Fast Rotorcraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Fast Rotorcraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Fast Rotorcraft"} +{"code":"H2020-EU.3.1.3.1.","title":"Treating disease, including developing regenerative medicine","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease | Treating disease, including developing regenerative medicine","classification_short":"Societal Challenges | Health | Treating and managing disease | Treating disease, including developing regenerative medicine"} +{"code":"H2020-EU.3.4.3.3.","title":"Advanced production processes","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Advanced production processes","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Advanced production processes"} +{"code":"H2020-EU.3.1.7.","title":"Innovative Medicines Initiative 2 (IMI2)","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2)","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2)"} +{"code":"H2020-EU.3.6.3.2.","title":"Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity"} +{"code":"H2020-EU.3.5.1.2.","title":"Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures"} +{"code":"H2020-EU.3.6.1.","title":"Inclusive societies","shortTitle":"Inclusive societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies"} +{"code":"H2020-EU.3.2.","title":"SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy","shortTitle":"Food, agriculture, forestry, marine research and bioeconomy","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy"} +{"code":"H2020-EU.2.1.6.1.2.","title":"Boost innovation between space and non-space sectors","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector | Boost innovation between space and non-space sectors","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation | Boost innovation between space and non-space sectors"} +{"code":"H2020-EU.2.1.3.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Advanced materials","shortTitle":"Advanced materials","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials"} +{"code":"H2020-EU.2.1.2.3.","title":"Developing the societal dimension of nanotechnology","shortTitle":"Societal dimension of nanotechnology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing the societal dimension of nanotechnology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Societal dimension of nanotechnology"} +{"code":"H2020-EU.4.","title":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION","shortTitle":"Spreading excellence and widening participation","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION","classification_short":"Spreading excellence and widening participation"} +{"code":"H2020-EU.3.6.1.2.","title":"Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change"} +{"code":"H2020-EU.3.4.2.","title":"Better mobility, less congestion, more safety and security","shortTitle":"Mobility, safety and security","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security","classification_short":"Societal Challenges | Transport | Mobility, safety and security"} +{"code":"H2020-EU.3.1.7.13.","title":"Other","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Other","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Other"} +{"code":"H2020-EU.3.3.3.3.","title":"New alternative fuels","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | New alternative fuels","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | New alternative fuels"} +{"code":"H2020-EU.2.1.3.5.","title":"Materials for creative industries, including heritage","shortTitle":"Materials for creative industries, including heritage","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials for creative industries, including heritage","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials for creative industries, including heritage"} +{"code":"H2020-EU.3.3.3.2.","title":"Reducing time to market for hydrogen and fuel cells technologies","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | Reducing time to market for hydrogen and fuel cells technologies","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | Reducing time to market for hydrogen and fuel cells technologies"} +{"code":"H2020-EU.5.d.","title":"Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels","classification_short":"Science with and for Society | Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels"} +{"code":"H2020-EU.3.1.","title":"SOCIETAL CHALLENGES - Health, demographic change and well-being","shortTitle":"Health","language":"en","classification":"Societal challenges | Health, demographic change and well-being","classification_short":"Societal Challenges | Health"} +{"code":"H2020-EU.3.5.3.1.","title":"Improve the knowledge base on the availability of raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Improve the knowledge base on the availability of raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Improve the knowledge base on the availability of raw materials"} +{"code":"H2020-EU.3.2.1.4.","title":"Sustainable forestry","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Sustainable forestry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Sustainable forestry"} +{"code":"H2020-EU.3.3.","title":"SOCIETAL CHALLENGES - Secure, clean and efficient energy","shortTitle":"Energy","language":"en","classification":"Societal challenges | Secure, clean and efficient energy","classification_short":"Societal Challenges | Energy"} +{"code":"H2020-EU.3.4.8.1.","title":"Innovation Programme 1 (IP1): Cost-efficient and reliable trains","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 1 (IP1): Cost-efficient and reliable trains","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 1 (IP1): Cost-efficient and reliable trains"} +{"code":"H2020-EU.2.3.2.1.","title":"Support for research intensive SMEs","shortTitle":"Support for research intensive SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Support for research intensive SMEs","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Support for research intensive SMEs"} +{"code":"H2020-EU.2.1.3.2.","title":"Materials development and transformation","shortTitle":"Materials development and transformation","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials development and transformation","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials development and transformation"} +{"code":"H2020-EU.1.4.1.3.","title":"Development, deployment and operation of ICT-based e-infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Development, deployment and operation of ICT-based e-infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Development, deployment and operation of ICT-based e-infrastructures"} +{"code":"H2020-EU.3.5.4.2.","title":"Support innovative policies and societal changes","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Support innovative policies and societal changes","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Support innovative policies and societal changes"} +{"code":"H2020-EU.2.1.3.6.","title":"Metrology, characterisation, standardisation and quality control","shortTitle":"Metrology, characterisation, standardisation and quality control","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Metrology, characterisation, standardisation and quality control","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Metrology, characterisation, standardisation and quality control"} +{"code":"H2020-EU.3.4.5.8.","title":"ECO Transverse Area","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ECO Transverse Area","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ECO Transverse Area"} +{"code":"H2020-EU.5.f.","title":"Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation","classification_short":"Science with and for Society | Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation"} +{"code":"H2020-EU.5.h.","title":"Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public","classification_short":"Science with and for Society | Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public"} +{"code":"H2020-EU.2.1.1.7.1.","title":"Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration"} +{"code":"H2020-EU.3.7.5.","title":"Increase Europe's resilience to crises and disasters","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Increase Europe's resilience to crises and disasters","classification_short":"Societal Challenges | Secure societies | Increase Europe's resilience to crises and disasters"} +{"code":"H2020-EU.1.4.2.2.","title":"Strengthening the human capital of research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources | Strengthening the human capital of research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources | Strengthening the human capital of research infrastructures"} +{"code":"H2020-EU.3.4.1.2.","title":"Developing smart equipment, infrastructures and services","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Developing smart equipment, infrastructures and services","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Developing smart equipment, infrastructures and services"} +{"code":"H2020-EU.2.3.2.2.","title":"Enhancing the innovation capacity of SMEs","shortTitle":"Enhancing the innovation capacity of SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Enhancing the innovation capacity of SMEs","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Enhancing the innovation capacity of SMEs"} +{"code":"H2020-EU.1.3.5.","title":"Specific support and policy actions","shortTitle":"MSCA Specific support","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Specific support and policy actions","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Specific support"} +{"code":"H2020-EU.3.2.3.3.","title":"Boosting marine and maritime innovation through biotechnology","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Boosting marine and maritime innovation through biotechnology","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Boosting marine and maritime innovation through biotechnology"} +{"code":"H2020-EU.3.2.1.2.","title":"Providing ecosystems services and public goods","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Providing ecosystems services and public goods","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Providing ecosystems services and public goods"} +{"code":"H2020-EU.2.3.2.3.","title":"Supporting market-driven innovation","shortTitle":"Supporting market-driven innovation","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Supporting market-driven innovation","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Supporting market-driven innovation"} +{"code":"H2020-EU.5.a.","title":"Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations","classification_short":"Science with and for Society | Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations"} +{"code":"H2020-EU.3.1.7.9.","title":"Ageing-associated diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Ageing-associated diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Ageing-associated diseases"} +{"code":"H2020-EU.2.2.1.","title":"The Debt facility providing debt finance for R&I: 'Union loan and guarantee service for research and innovation'","shortTitle":"Debt facility","language":"en","classification":"Industrial leadership | Access to risk finance | The Debt facility providing debt finance for R&I: 'Union loan and guarantee service for research and innovation'","classification_short":"Industrial Leadership | Access to risk finance | Debt facility"} +{"code":"H2020-Euratom-1.8.","title":"Ensure availability and use of research infrastructures of pan_european relevance","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Ensure availability and use of research infrastructures of pan_european relevance","classification_short":"Euratom | Indirect actions | Ensure availability and use of research infrastructures of pan_european relevance"} +{"code":"H2020-EU.3.2.2.1.","title":"Informed consumer choices","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Informed consumer choices","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Informed consumer choices"} +{"code":"H2020-EU.3.7.","title":"Secure societies - Protecting freedom and security of Europe and its citizens","shortTitle":"Secure societies","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens","classification_short":"Societal Challenges | Secure societies"} +{"code":"H2020-EU.1.3.4.","title":"Increasing structural impact by co-funding activities","shortTitle":"MSCA Co-funding","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Increasing structural impact by co-funding activities","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Co-funding"} +{"code":"H2020-EU.2.1.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies","shortTitle":"Leadership in enabling and industrial technologies (LEIT)","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT)"} +{"code":"H2020-EU.2.1.3.4.","title":"Materials for a sustainable, resource-efficient and low-emission industry","shortTitle":"Materials for a resource-efficient and low-emission industry","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials for a sustainable, resource-efficient and low-emission industry","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials for a resource-efficient and low-emission industry"} +{"code":"H2020-EU.3.4.5.7.","title":"Small Air Transport (SAT) Transverse Area","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Small Air Transport (SAT) Transverse Area","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Small Air Transport (SAT) Transverse Area"} +{"code":"H2020-EU.3.4.8.3.","title":"Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure"} +{"code":"H2020-Euratom-1.1.","title":"Support safe operation of nuclear systems","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Support safe operation of nuclear systems","classification_short":"Euratom | Indirect actions | Support safe operation of nuclear systems"} +{"code":"H2020-EU.2.3.1.","title":" Mainstreaming SME support, especially through a dedicated instrument","shortTitle":"Mainstreaming SME support","language":"en","classification":"Industrial leadership | Innovation In SMEs | Mainstreaming SME support, especially through a dedicated instrument","classification_short":"Industrial Leadership | Innovation in SMEs | Mainstreaming SME support"} +{"code":"H2020-EU.1.4.3.1.","title":"Reinforcing European policy for research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation | Reinforcing European policy for research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation | Reinforcing European policy for research infrastructures"} +{"code":"H2020-Euratom-1.3.","title":"Support the development and sustainability of nuclear competences at Union level","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Support the development and sustainability of nuclear competences at Union level","classification_short":"Euratom | Indirect actions | Support the development and sustainability of nuclear competences at Union level"} +{"code":"H2020-EU.3.1.7.1.","title":"Antimicrobial resistance","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Antimicrobial resistance","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Antimicrobial resistance"} +{"code":"H2020-EU.3.7.4.","title":"Improve cyber security","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Improve cyber security","classification_short":"Societal Challenges | Secure societies | Improve cyber security"} +{"code":"H2020-EU.2.1.1.7.2.","title":"Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services"} +{"code":"H2020-EU.3.5.4.","title":"Enabling the transition towards a green economy and society through eco-innovation","shortTitle":"A green economy and society through eco-innovation","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation"} +{"code":"H2020-EU.3.5.3.2.","title":"Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery"} +{"code":"H2020-EU.3.4.5.10.","title":"Thematic Topics","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Thematic Topics","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Thematic Topics"} +{"code":"H2020-EU.3.1.5.1.","title":"Improving halth information and better use of health data","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Improving halth information and better use of health data","classification_short":"Societal Challenges | Health | Methods and data | Improving halth information and better use of health data"} +{"code":"H2020-EU.3.3.3.1.","title":"Make bio-energy more competitive and sustainable","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | Make bio-energy more competitive and sustainable","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | Make bio-energy more competitive and sustainable"} +{"code":"H2020-EU.3.6.2.1.","title":"Strengthen the evidence base and support for the Innovation Union and ERA","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Strengthen the evidence base and support for the Innovation Union and ERA","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Strengthen the evidence base and support for the Innovation Union and ERA"} +{"code":"H2020-EU.3.1.7.12.","title":"Vaccine","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Vaccine","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Vaccine"} +{"code":"H2020-EU.3.5.4.3.","title":"Measure and assess progress towards a green economy","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Measure and assess progress towards a green economy","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Measure and assess progress towards a green economy"} +{"code":"H2020-EU.3.4.8.5.","title":"Innovation Programme 5: Technologies for sustainable and attractive European rail freight","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 5: Technologies for sustainable and attractive European rail freight","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 5: Technologies for sustainable and attractive European rail freight"} +{"code":"H2020-EU.3.5.4.4.","title":"Foster resource efficiency through digital systems","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Foster resource efficiency through digital systems","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Foster resource efficiency through digital systems"} +{"code":"H2020-EU.3.3.8.3.","title":"Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources"} +{"code":"H2020-Euratom","title":"Euratom","shortTitle":"","language":"en","classification":"Euratom","classification_short":"Euratom"} +{"code":"H2020-EU.3.5.6.2.","title":"Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage | Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards","classification_short":"Societal Challenges | Climate and environment | Cultural heritage | Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards"} +{"code":"H2020-EU.3.2.5.2.","title":"Develop the potential of marine resources through an integrated approach","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Develop the potential of marine resources through an integrated approach","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Develop the potential of marine resources through an integrated approach"} +{"code":"H2020-EU.2.1.1.5.","title":"Advanced interfaces and robots: Robotics and smart spaces","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Advanced interfaces and robots: Robotics and smart spaces","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Advanced interfaces and robots: Robotics and smart spaces"} +{"code":"H2020-EU.3.3.5.","title":"New knowledge and technologies","shortTitle":"New knowledge and technologies","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | New knowledge and technologies","classification_short":"Societal Challenges | Energy | New knowledge and technologies"} +{"code":"H2020-EU.1.2.2.","title":"FET Proactive","shortTitle":"FET Proactive","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Proactive","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Proactive"} +{"code":"H2020-EU.3.6.1.3.","title":"Europe's role as a global actor, notably regarding human rights and global justice","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | Europe's role as a global actor, notably regarding human rights and global justice","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | Europe's role as a global actor, notably regarding human rights and global justice"} +{"code":"H2020-EU.2.1.4.1.","title":"Boosting cutting-edge biotechnologies as a future innovation driver","shortTitle":"Cutting-edge biotechnologies as future innovation driver","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Boosting cutting-edge biotechnologies as a future innovation driver","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Cutting-edge biotechnologies as future innovation driver"} +{"code":"H2020-EU.3.1.3.","title":"Treating and managing disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease","classification_short":"Societal Challenges | Health | Treating and managing disease"} +{"code":"H2020-EU.3.3.4.","title":"A single, smart European electricity grid","shortTitle":"A single, smart European electricity grid","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | A single, smart European electricity grid","classification_short":"Societal Challenges | Energy | A single, smart European electricity grid"} +{"code":"H2020-EU.3.2.6.","title":"Bio-based Industries Joint Technology Initiative (BBI-JTI)","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI)","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI)"} +{"code":"H2020-EU.1.3.2.","title":"Nurturing excellence by means of cross-border and cross-sector mobility","shortTitle":"MSCA Mobility","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Nurturing excellence by means of cross-border and cross-sector mobility","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Mobility"} +{"code":"H2020-EU.2.1.3.7.","title":"Optimisation of the use of materials","shortTitle":"Optimisation of the use of materials","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Optimisation of the use of materials","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Optimisation of the use of materials"} +{"code":"H2020-EU.2.1.2.4.","title":"Efficient and sustainable synthesis and manufacturing of nanomaterials, components and systems","shortTitle":"Synthesis and manufacturing of nanomaterials, components and systems","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Efficient and sustainable synthesis and manufacturing of nanomaterials, components and systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Synthesis and manufacturing of nanomaterials, components and systems"} +{"code":"H2020-EU.1.4.1.","title":"Developing the European research infrastructures for 2020 and beyond","shortTitle":"Research infrastructures for 2020 and beyond","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond"} +{"code":"H2020-EU.3.1.1.1.","title":"Understanding the determinants of health, improving health promotion and disease prevention","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Understanding the determinants of health, improving health promotion and disease prevention","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Understanding the determinants of health, improving health promotion and disease prevention"} +{"code":"H2020-EU.5.c.","title":"Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology","classification_short":"Science with and for Society | Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology"} +{"code":"H2020-EU.5.","title":"SCIENCE WITH AND FOR SOCIETY","shortTitle":"Science with and for Society","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY","classification_short":"Science with and for Society"} +{"code":"H2020-EU.3.5.3.3.","title":"Find alternatives for critical raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Find alternatives for critical raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Find alternatives for critical raw materials"} +{"code":"H2020-EU.3.2.3.1.","title":"Developing sustainable and environmentally-friendly fisheries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Developing sustainable and environmentally-friendly fisheries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Developing sustainable and environmentally-friendly fisheries"} +{"code":"H2020-EU.2.1.2.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Nanotechnologies","shortTitle":"Nanotechnologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies"} +{"code":"H2020-EU.3.4.3.2.","title":"On board, smart control systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | On board, smart control systems","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | On board, smart control systems"} +{"code":"H2020-EU.3.2.4.1.","title":"Fostering the bio-economy for bio-based industries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Fostering the bio-economy for bio-based industries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Fostering the bio-economy for bio-based industries"} +{"code":"H2020-EU.3.1.6.2.","title":"Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care | Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches","classification_short":"Societal Challenges | Health | Health care provision and integrated care | Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches"} +{"code":"H2020-EU.2.1.5.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Advanced manufacturing and processing","shortTitle":"Advanced manufacturing and processing","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing"} +{"code":"H2020-EU.3.5.2.2.","title":"Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services"} +{"code":"H2020-EU.3.1.7.3.","title":"Cardiovascular diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Cardiovascular diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Cardiovascular diseases"} +{"code":"H2020-EU.3.3.8.2.","title":"Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market"} +{"code":"H2020-EU.2.1.6.3.","title":"Enabling exploitation of space data","shortTitle":"Enabling exploitation of space data","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling exploitation of space data","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Enabling exploitation of space data"} +{"code":"H2020-EU.2.1.2.5.","title":"Developing and standardisation of capacity-enhancing techniques, measuring methods and equipment","shortTitle":"Capacity-enhancing techniques, measuring methods and equipment","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing and standardisation of capacity-enhancing techniques, measuring methods and equipment","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Capacity-enhancing techniques, measuring methods and equipment"} +{"code":"H2020-EU.3.6.2.","title":"Innovative societies","shortTitle":"Innovative societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies"} +{"code":"H2020-EU.3.1.2.1.","title":"Developing effective prevention and screening programmes and improving the assessment of disease susceptibility","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Developing effective prevention and screening programmes and improving the assessment of disease susceptibility","classification_short":"Societal Challenges | Health | Preventing disease | Developing effective prevention and screening programmes and improving the assessment of disease susceptibility"} +{"code":"H2020-EU.3.6.1.4.","title":"The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design"} +{"code":"H2020-EU.3.3.2.4.","title":"Develop geothermal, hydro, marine and other renewable energy options","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop geothermal, hydro, marine and other renewable energy options","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop geothermal, hydro, marine and other renewable energy options"} +{"code":"H2020-EU.5.b.","title":"Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities","classification_short":"Science with and for Society | Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities"} +{"code":"H2020-EU.1.3.3.","title":"Stimulating innovation by means of cross-fertilisation of knowledge","shortTitle":"MSCA Knowledge","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Stimulating innovation by means of cross-fertilisation of knowledge","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Knowledge"} +{"code":"H2020-EU.3.1.4.2.","title":"Individual awareness and empowerment for self-management of health","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health | Individual awareness and empowerment for self-management of health","classification_short":"Societal Challenges | Health | Active ageing and self-management of health | Individual awareness and empowerment for self-management of health"} +{"code":"H2020-EU.3.1.7.8.","title":"Immune-mediated diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Immune-mediated diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Immune-mediated diseases"} +{"code":"H2020-EU.3.4.","title":"SOCIETAL CHALLENGES - Smart, Green And Integrated Transport","shortTitle":"Transport","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport","classification_short":"Societal Challenges | Transport"} +{"code":"H2020-EU.3.2.6.1.","title":"Sustainable and competitive bio-based industries and supporting the development of a European bio-economy","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable and competitive bio-based industries and supporting the development of a European bio-economy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable and competitive bio-based industries and supporting the development of a European bio-economy"} +{"code":"H2020-EU.2.1.2.1.","title":"Developing next generation nanomaterials, nanodevices and nanosystems ","shortTitle":"Next generation nanomaterials, nanodevices and nanosystems","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing next generation nanomaterials, nanodevices and nanosystems ","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Next generation nanomaterials, nanodevices and nanosystems"} +{"code":"H2020-Euratom-1.5.","title":"Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities","classification_short":"Euratom | Indirect actions | Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities"} +{"code":"H2020-EU.3.5.","title":"SOCIETAL CHALLENGES - Climate action, Environment, Resource Efficiency and Raw Materials","shortTitle":"Climate and environment","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials","classification_short":"Societal Challenges | Climate and environment"} +{"code":"H2020-EU.2.1.1.6.","title":"Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies"} +{"code":"H2020-EU.3.4.2.4.","title":"Reducing accident rates, fatalities and casualties and improving security","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Reducing accident rates, fatalities and casualties and improving security","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Reducing accident rates, fatalities and casualties and improving security"} +{"code":"H2020-EU.3.6.2.2.","title":"Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail"} +{"code":"H2020-EU.3.5.1.1.","title":"Improve the understanding of climate change and the provision of reliable climate projections","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Improve the understanding of climate change and the provision of reliable climate projections","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Improve the understanding of climate change and the provision of reliable climate projections"} +{"code":"H2020-EU.3.4.3.4.","title":"Exploring entirely new transport concepts","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Exploring entirely new transport concepts","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Exploring entirely new transport concepts"} +{"code":"H2020-EU.3.5.2.1.","title":"Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being"} +{"code":"H2020-EU.3.2.2.3.","title":"A sustainable and competitive agri-food industry","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | A sustainable and competitive agri-food industry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | A sustainable and competitive agri-food industry"} +{"code":"H2020-EU.1.4.1.1.","title":"Developing new world-class research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Developing new world-class research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Developing new world-class research infrastructures"} +{"code":"H2020-EU.3.1.2.3.","title":"Developing better preventive and therapeutic vaccines","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Developing better preventive and therapeutic vaccines","classification_short":"Societal Challenges | Health | Preventing disease | Developing better preventive and therapeutic vaccines"} +{"code":"H2020-EU.1.4.3.2.","title":"Facilitate strategic international cooperation","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation | Facilitate strategic international cooperation","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation | Facilitate strategic international cooperation"} +{"code":"H2020-EU.3.5.2.","title":"Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems","shortTitle":"Protection of the environment","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems","classification_short":"Societal Challenges | Climate and environment | Protection of the environment"} +{"code":"H2020-Euratom-1.9.","title":"European Fusion Development Agreement","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | European Fusion Development Agreement","classification_short":"Euratom | Indirect actions | European Fusion Development Agreement"} +{"code":"H2020-EU.3.2.1.1.","title":"Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience"} +{"code":"H2020-EU.3.2.2.2.","title":"Healthy and safe foods and diets for all","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Healthy and safe foods and diets for all","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Healthy and safe foods and diets for all"} +{"code":"H2020-EU.2.1.4.2.","title":"Bio-technology based industrial products and processes","shortTitle":"Bio-technology based industrial products and processes","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Bio-technology based industrial products and processes","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Bio-technology based industrial products and processes"} +{"code":"H2020-EU.3.4.5.1.","title":"IADP Large Passenger Aircraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Large Passenger Aircraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Large Passenger Aircraft"} +{"code":"H2020-EU.3.1.1.3.","title":"Improving surveillance and preparedness","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Improving surveillance and preparedness","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Improving surveillance and preparedness"} +{"code":"H2020-EU.2.1.6.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Space","shortTitle":"Space","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space"} +{"code":"H2020-EU.3.1.5.2.","title":"Improving scientific tools and methods to support policy making and regulatory needs","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Improving scientific tools and methods to support policy making and regulatory needs","classification_short":"Societal Challenges | Health | Methods and data | Improving scientific tools and methods to support policy making and regulatory needs"} +{"code":"H2020-EU.3.","title":"Societal challenges","shortTitle":"Societal Challenges","language":"en","classification":"Societal challenges","classification_short":"Societal Challenges"} +{"code":"H2020-EU.1.3.","title":"EXCELLENT SCIENCE - Marie Skłodowska-Curie Actions","shortTitle":"Marie-Sklodowska-Curie Actions","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions"} +{"code":"H2020-EU.4.f.","title":"Strengthening the administrative and operational capacity of transnational networks of National Contact Points","shortTitle":"","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Strengthening the administrative and operational capacity of transnational networks of National Contact Points","classification_short":"Spreading excellence and widening participation | Strengthening the administrative and operational capacity of transnational networks of National Contact Points"} +{"code":"H2020-EU.1.2.","title":"EXCELLENT SCIENCE - Future and Emerging Technologies (FET)","shortTitle":"Future and Emerging Technologies (FET)","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET)","classification_short":"Excellent Science | Future and Emerging Technologies (FET)"} +{"code":"H2020-EU.3.3.1.1.","title":"Bring to mass market technologies and services for a smart and efficient energy use","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Bring to mass market technologies and services for a smart and efficient energy use","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Bring to mass market technologies and services for a smart and efficient energy use"} +{"code":"H2020-EU.3.3.2.2.","title":"Develop efficient, reliable and cost-competitive solar energy systems","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop efficient, reliable and cost-competitive solar energy systems","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop efficient, reliable and cost-competitive solar energy systems"} +{"code":"H2020-EU.4.c.","title":"Establishing ‚ERA Chairs’","shortTitle":"ERA chairs","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Establishing ‚ERA Chairs’","classification_short":"Spreading excellence and widening participation | ERA chairs"} +{"code":"H2020-EU.3.4.5","title":"CLEANSKY2","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2","classification_short":"Societal Challenges | Transport | CLEANSKY2"} +{"code":"H2020-EU.3.4.5.2.","title":"IADP Regional Aircraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Regional Aircraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Regional Aircraft"} +{"code":"H2020-EU.3.5.1.","title":"Fighting and adapting to climate change","shortTitle":"Fighting and adapting to climate change","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change"} +{"code":"H2020-EU.3.3.1.","title":"Reducing energy consumption and carbon foorpint by smart and sustainable use","shortTitle":"Reducing energy consumption and carbon footprint","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint"} +{"code":"H2020-EU.3.4.1.","title":"Resource efficient transport that respects the environment","shortTitle":"Resource efficient transport that respects the environment","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment"} +{"code":"H2020-EU.3.2.6.2.","title":"Fostering the bio-economy for bio-based industrie","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Fostering the bio-economy for bio-based industrie","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Fostering the bio-economy for bio-based industrie"} +{"code":"H2020-EU.3.4.7.1","title":"Exploratory Research","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | SESAR JU | Exploratory Research","classification_short":"Societal Challenges | Transport | SESAR JU | Exploratory Research"} +{"code":"H2020-EU.1.2.1.","title":"FET Open","shortTitle":"FET Open","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Open","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Open"} +{"code":"H2020-EU.3.4.3.1.","title":"Developing the next generation of transport means as the way to secure market share in the future","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Developing the next generation of transport means as the way to secure market share in the future","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Developing the next generation of transport means as the way to secure market share in the future"} +{"code":"H2020-EU.3.2.4.","title":"Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy","shortTitle":"Bio-based industries and supporting bio-economy","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy"} +{"code":"H2020-EC","title":"Horizon 2020 Framework Programme","shortTitle":"EC Treaty","language":"en","classification":"Horizon 2020 Framework Programme","classification_short":"EC Treaty"} +{"code":"H2020-EU.3.6.2.4.","title":"Promote coherent and effective cooperation with third countries","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Promote coherent and effective cooperation with third countries","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Promote coherent and effective cooperation with third countries"} +{"code":"H2020-EU.3.1.7.5.","title":"Neurodegenerative diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Neurodegenerative diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Neurodegenerative diseases"} +{"code":"H2020-EU.2.1.6.4.","title":"Enabling European research in support of international space partnerships","shortTitle":"Research in support of international space partnerships","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European research in support of international space partnerships","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Research in support of international space partnerships"} +{"code":"H2020-EU.2.1.5.1.","title":"Technologies for Factories of the Future","shortTitle":"Technologies for Factories of the Future","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Technologies for Factories of the Future","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Technologies for Factories of the Future"} +{"code":"H2020-EU.2.3.2.","title":"Specific support","shortTitle":"","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support"} +{"code":"H2020-EU.1.4.2.","title":"Fostering the innovation potential of research infrastructures and their human resources","shortTitle":"Research infrastructures and their human resources","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources"} +{"code":"H2020-EU.3.3.1.2.","title":"Unlock the potential of efficient and renewable heating-cooling systems","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Unlock the potential of efficient and renewable heating-cooling systems","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Unlock the potential of efficient and renewable heating-cooling systems"} +{"code":"H2020-EU.3.2.3.2.","title":"Developing competitive and environmentally-friendly European aquaculture","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Developing competitive and environmentally-friendly European aquaculture","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Developing competitive and environmentally-friendly European aquaculture"} +{"code":"H2020-EU.3.2.1.3.","title":"Empowerment of rural areas, support to policies and rural innovation","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Empowerment of rural areas, support to policies and rural innovation","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Empowerment of rural areas, support to policies and rural innovation"} +{"code":"H2020-EU.3.2.5.3.","title":"Cross-cutting concepts and technologies enabling maritime growth","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Cross-cutting concepts and technologies enabling maritime growth","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Cross-cutting concepts and technologies enabling maritime growth"} +{"code":"H2020-EU.2.1.3.1.","title":"Cross-cutting and enabling materials technologies","shortTitle":"Cross-cutting and enabling materials technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Cross-cutting and enabling materials technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Cross-cutting and enabling materials technologies"} +{"code":"H2020-EU.3.1.1.2.","title":"Understanding disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Understanding disease","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Understanding disease"} +{"code":"H2020-Euratom-1.6.","title":"Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design","classification_short":"Euratom | Indirect actions | Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design"} +{"code":"H2020-EU.3.5.7.1.","title":"Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | FCH2 (raw materials objective) | Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements","classification_short":"Societal Challenges | Climate and environment | FCH2 (raw materials objective) | Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements"} +{"code":"H2020-EU.2.2.","title":"INDUSTRIAL LEADERSHIP - Access to risk finance","shortTitle":"Access to risk finance","language":"en","classification":"Industrial leadership | Access to risk finance","classification_short":"Industrial Leadership | Access to risk finance"} +{"code":"H2020-EU.3.4.6.","title":"FCH2 (transport objectives)","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | FCH2 (transport objectives)","classification_short":"Societal Challenges | Transport | FCH2 (transport objectives)"} +{"code":"H2020-EU.4.d.","title":"A Policy Support Facility","shortTitle":"Policy Support Facility (PSF)","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | A Policy Support Facility","classification_short":"Spreading excellence and widening participation | Policy Support Facility (PSF)"} +{"code":"H2020-EU.2.1.1.7.","title":"ECSEL","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL"} +{"code":"H2020-EU.3.1.5.","title":"Methods and data","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data","classification_short":"Societal Challenges | Health | Methods and data"} +{"code":"H2020-EU.3.7.7.","title":"Enhance stadardisation and interoperability of systems, including for emergency purposes","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Enhance stadardisation and interoperability of systems, including for emergency purposes","classification_short":"Societal Challenges | Secure societies | Enhance stadardisation and interoperability of systems, including for emergency purposes"} +{"code":"H2020-Euratom-1.7.","title":"Promote innovation and industry competitiveness","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Promote innovation and industry competitiveness","classification_short":"Euratom | Indirect actions | Promote innovation and industry competitiveness"} +{"code":"H2020-EU.2.1.5.3.","title":"Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","shortTitle":"Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries"} +{"code":"H2020-EU.2.1.4.3.","title":"Innovative and competitive platform technologies","shortTitle":"Innovative and competitive platform technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Innovative and competitive platform technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Innovative and competitive platform technologies"} +{"code":"H2020-EU.1.2.3.","title":"FET Flagships","shortTitle":"FET Flagships","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Flagships","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Flagships"} +{"code":"H2020-EU.3.6.3.","title":"Reflective societies - cultural heritage and European identity","shortTitle":"Reflective societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies"} +{"code":"H2020-EU.3.6.3.3.","title":"Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures"} +{"code":"H2020-EU.3.2.4.2.","title":"Developing integrated biorefineries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Developing integrated biorefineries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Developing integrated biorefineries"} +{"code":"H2020-EU.2.1.6.1.1.","title":"Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector | Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation | Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems"} +{"code":"H2020-EU.3.1.3.2.","title":"Transferring knowledge to clinical practice and scalable innovation actions","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease | Transferring knowledge to clinical practice and scalable innovation actions","classification_short":"Societal Challenges | Health | Treating and managing disease | Transferring knowledge to clinical practice and scalable innovation actions"} +{"code":"H2020-EU.2.","title":"Industrial leadership","shortTitle":"Industrial Leadership","language":"en","classification":"Industrial leadership","classification_short":"Industrial Leadership"} +{"code":"H2020-EU.3.4.1.3.","title":"Improving transport and mobility in urban areas","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Improving transport and mobility in urban areas","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Improving transport and mobility in urban areas"} +{"code":"H2020-EU.4.e.","title":"Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks","shortTitle":"","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks","classification_short":"Spreading excellence and widening participation | Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks"} +{"code":"H2020-EU.3.2.1.","title":"Sustainable agriculture and forestry","shortTitle":"Sustainable agriculture and forestry","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry"} +{"code":"H2020-EU.3.1.7.7.","title":"Respiratory diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Respiratory diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Respiratory diseases"} +{"code":"H2020-EU.3.4.8.6.","title":"Cross-cutting themes and activities (CCA)","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Cross-cutting themes and activities (CCA)","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Cross-cutting themes and activities (CCA)"} +{"code":"H2020-EU.3.4.8.4.","title":"Innovation Programme 4: IT Solutions for attractive railway services","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 4: IT Solutions for attractive railway services","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 4: IT Solutions for attractive railway services"} +{"code":"H2020-EU.3.2.2.","title":"Sustainable and competitive agri-food sector for a safe and healthy diet","shortTitle":"Sustainable and competitive agri-food sector for a safe and healthy diet","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet"} +{"code":"H2020-EU.3.4.3.","title":"Global leadership for the European transport industry","shortTitle":"Global leadership for the European transport industry","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry"} +{"code":"H2020-EU.1.4.2.1.","title":"Exploiting the innovation potential of research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources | Exploiting the innovation potential of research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources | Exploiting the innovation potential of research infrastructures"} +{"code":"H2020-EU.3.3.2.3.","title":"Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use"} +{"code":"H2020-EU.3.6.3.1.","title":"Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past"} +{"code":"H2020-EU.2.1.2.2.","title":"Ensuring the safe and sustainable development and application of nanotechnologies","shortTitle":"Safe and sustainable nanotechnologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Ensuring the safe and sustainable development and application of nanotechnologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Safe and sustainable nanotechnologies"} +{"code":"H2020-EU.3.1.6.","title":"Health care provision and integrated care","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care","classification_short":"Societal Challenges | Health | Health care provision and integrated care"} +{"code":"H2020-EU.3.4.5.9.","title":"Technology Evaluator","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Technology Evaluator","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Technology Evaluator"} +{"code":"H2020-EU.3.6.","title":"SOCIETAL CHALLENGES - Europe In A Changing World - Inclusive, Innovative And Reflective Societies","shortTitle":"Inclusive, innovative and reflective societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies"} +{"code":"H2020-EU.3.4.8.","title":"Shift2Rail JU","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU","classification_short":"Societal Challenges | Transport | Shift2Rail JU"} +{"code":"H2020-EU.3.2.6.3.","title":"Sustainable biorefineries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable biorefineries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable biorefineries"} +{"code":"H2020-EU.4.a.","title":"Teaming of excellent research institutions and low performing RDI regions","shortTitle":"Teaming of research institutions and low performing regions","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Teaming of excellent research institutions and low performing RDI regions","classification_short":"Spreading excellence and widening participation | Teaming of research institutions and low performing regions"} +{"code":"H2020-EU.3.1.7.4.","title":"Diabetes","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Diabetes","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Diabetes"} +{"code":"H2020-EU.3.7.2.","title":"Protect and improve the resilience of critical infrastructures, supply chains and tranport modes","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Protect and improve the resilience of critical infrastructures, supply chains and tranport modes","classification_short":"Societal Challenges | Secure societies | Protect and improve the resilience of critical infrastructures, supply chains and tranport modes"} +{"code":"H2020-EU.3.1.2.","title":"Preventing disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease","classification_short":"Societal Challenges | Health | Preventing disease"} +{"code":"H2020-EU.3.5.3.4.","title":"Improve societal awareness and skills on raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Improve societal awareness and skills on raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Improve societal awareness and skills on raw materials"} +{"code":"H2020-EU.3.3.7.","title":"Market uptake of energy innovation - building on Intelligent Energy Europe","shortTitle":"Market uptake of energy innovation","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Market uptake of energy innovation - building on Intelligent Energy Europe","classification_short":"Societal Challenges | Energy | Market uptake of energy innovation"} +{"code":"H2020-EU.2.3.","title":"INDUSTRIAL LEADERSHIP - Innovation In SMEs","shortTitle":"Innovation in SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs","classification_short":"Industrial Leadership | Innovation in SMEs"} +{"code":"H2020-EU.2.1.1.3.","title":"Future Internet: Software, hardware, Infrastructures, technologies and services","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Future Internet: Software, hardware, Infrastructures, technologies and services","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Future Internet: Software, hardware, Infrastructures, technologies and services"} +{"code":"H2020-EU.3.1.5.3.","title":"Using in-silico medicine for improving disease management and prediction","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Using in-silico medicine for improving disease management and prediction","classification_short":"Societal Challenges | Health | Methods and data | Using in-silico medicine for improving disease management and prediction"} +{"code":"H2020-EU.3.6.1.1.","title":"The mechanisms to promote smart, sustainable and inclusive growth","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | The mechanisms to promote smart, sustainable and inclusive growth","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | The mechanisms to promote smart, sustainable and inclusive growth"} +{"code":"H2020-EU.1.3.1.","title":"Fostering new skills by means of excellent initial training of researchers","shortTitle":"MCSA Initial training","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Fostering new skills by means of excellent initial training of researchers","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MCSA Initial training"} +{"code":"H2020-EU.3.6.2.3.","title":"Make use of the innovative, creative and productive potential of all generations","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Make use of the innovative, creative and productive potential of all generations","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Make use of the innovative, creative and productive potential of all generations"} +{"code":"H2020-EU.3.5.1.3.","title":"Support mitigation policies, including studies that focus on impact from other sectoral policies","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Support mitigation policies, including studies that focus on impact from other sectoral policies","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Support mitigation policies, including studies that focus on impact from other sectoral policies"} +{"code":"H2020-EU.3.3.1.3.","title":"Foster European Smart cities and Communities","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Foster European Smart cities and Communities","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Foster European Smart cities and Communities"} +{"code":"H2020-EU.3.1.1.","title":"Understanding health, wellbeing and disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease"} +{"code":"H2020-Euratom-1.","title":"Indirect actions","shortTitle":"","language":"en","classification":"Euratom | Indirect actions","classification_short":"Euratom | Indirect actions"} +{"code":"H2020-EU.3.5.7.","title":"FCH2 (raw materials objective)","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | FCH2 (raw materials objective)","classification_short":"Societal Challenges | Climate and environment | FCH2 (raw materials objective)"} +{"code":"H2020-EU.3.7.3.","title":"Strengthen security through border management","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Strengthen security through border management","classification_short":"Societal Challenges | Secure societies | Strengthen security through border management"} +{"code":"H2020-EU.2.1.1.2.","title":"Next generation computing: Advanced and secure computing systems and technologies, including cloud computing","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Next generation computing: Advanced and secure computing systems and technologies, including cloud computing","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Next generation computing: Advanced and secure computing systems and technologies, including cloud computing"} +{"code":"H2020-EU.3.5.5.","title":"Developing comprehensive and sustained global environmental observation and information systems","shortTitle":"Environmental observation and information systems","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Developing comprehensive and sustained global environmental observation and information systems","classification_short":"Societal Challenges | Climate and environment | Environmental observation and information systems"} +{"code":"H2020-EU.3.1.7.10.","title":"Cancer","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Cancer","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Cancer"} +{"code":"H2020-EU.3.4.8.2.","title":"Innovation Programme 2: Advanced traffic management and control systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 2: Advanced traffic management and control systems","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 2: Advanced traffic management and control systems"} +{"code":"H2020-EU.5.e.","title":"Develop the accessibility and the use of the results of publicly-funded research","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Develop the accessibility and the use of the results of publicly-funded research","classification_short":"Science with and for Society | Develop the accessibility and the use of the results of publicly-funded research"} +{"code":"H2020-EU.3.4.4.","title":"Socio-economic and behavioural research and forward looking activities for policy making","shortTitle":"Socio-economic and behavioural research","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Socio-economic and behavioural research and forward looking activities for policy making","classification_short":"Societal Challenges | Transport | Socio-economic and behavioural research"} +{"code":"H2020-EU.3.3.2.","title":"Low-cost, low-carbon energy supply","shortTitle":"Low-cost, low-carbon energy supply","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply"} +{"code":"H2020-EU.3.4.2.2.","title":"Substantial improvements in the mobility of people and freight","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Substantial improvements in the mobility of people and freight","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Substantial improvements in the mobility of people and freight"} +{"code":"H2020-EU.3.5.6.","title":"Cultural heritage","shortTitle":"Cultural heritage","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage","classification_short":"Societal Challenges | Climate and environment | Cultural heritage"} +{"code":"H2020-EU.3.5.3.","title":"Ensuring the sustainable supply of non-energy and non-agricultural raw materials","shortTitle":"Supply of non-energy and non-agricultural raw materials","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials"} +{"code":"H2020-EU.2.1.5.2.","title":"Technologies enabling energy-efficient systems and energy-efficient buildings with a low environmental impact","shortTitle":"Technologies enabling energy-efficient systems and buildings","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Technologies enabling energy-efficient systems and energy-efficient buildings with a low environmental impact","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Technologies enabling energy-efficient systems and buildings"} +{"code":"H2020-EU.1.4.1.2.","title":"Integrating and opening existing national and regional research infrastructures of European interest","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Integrating and opening existing national and regional research infrastructures of European interest","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Integrating and opening existing national and regional research infrastructures of European interest"} +{"code":"H2020-EU.3.7.8.","title":"Support the Union's external security policies including through conflict prevention and peace-building","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Support the Union's external security policies including through conflict prevention and peace-building","classification_short":"Societal Challenges | Secure societies | Support the Union's external security policies including through conflict prevention and peace-building"} +{"code":"H2020-EU.2.1.1.1.","title":"A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems"} +{"code":"H2020-EU.1.1.","title":"EXCELLENT SCIENCE - European Research Council (ERC)","shortTitle":"European Research Council (ERC)","language":"en","classification":"Excellent science | European Research Council (ERC)","classification_short":"Excellent Science | European Research Council (ERC)"} +{"code":"H2020-EU.3.4.5.6.","title":"ITD Systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Systems","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Systems"} +{"code":"H2020-EU.6.","title":"NON-NUCLEAR DIRECT ACTIONS OF THE JOINT RESEARCH CENTRE (JRC)","shortTitle":"Joint Research Centre (JRC) non-nuclear direct actions","language":"en","classification":"NON-NUCLEAR DIRECT ACTIONS OF THE JOINT RESEARCH CENTRE (JRC)","classification_short":"Joint Research Centre (JRC) non-nuclear direct actions"} +{"code":"H2020-EU.3.2.5.1.","title":"Climate change impact on marine ecosystems and maritime economy","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Climate change impact on marine ecosystems and maritime economy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Climate change impact on marine ecosystems and maritime economy"} +{"code":"H2020-Euratom-1.2.","title":"Contribute to the development of solutions for the management of ultimate nuclear waste","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Contribute to the development of solutions for the management of ultimate nuclear waste","classification_short":"Euratom | Indirect actions | Contribute to the development of solutions for the management of ultimate nuclear waste"} +{"code":"H2020-EU.3.1.7.11.","title":"Rare/Orphan Diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Rare/Orphan Diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Rare/Orphan Diseases"} +{"code":"H2020-EU.3.1.4.1.","title":"Active ageing, independent and assisted living","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health | Active ageing, independent and assisted living","classification_short":"Societal Challenges | Health | Active ageing and self-management of health | Active ageing, independent and assisted living"} +{"code":"H2020-Euratom-1.4.","title":"Foster radiation protection","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Foster radiation protection","classification_short":"Euratom | Indirect actions | Foster radiation protection"} +{"code":"H2020-EU.2.2.2.","title":"The Equity facility providing equity finance for R&I: 'Union equity instruments for research and innovation'","shortTitle":"Equity facility","language":"en","classification":"Industrial leadership | Access to risk finance | The Equity facility providing equity finance for R&I: 'Union equity instruments for research and innovation'","classification_short":"Industrial Leadership | Access to risk finance | Equity facility"} +{"code":"H2020-EU.3.3.8.","title":"FCH2 (energy objectives)","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives)","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives)"} +{"code":"H2020-EU.3.2.3.","title":"Unlocking the potential of aquatic living resources","shortTitle":"Potential of aquatic living resources","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources"} +{"code":"H2020-EU.3.5.2.3.","title":"Provide knowledge and tools for effective decision making and public engagement","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Provide knowledge and tools for effective decision making and public engagement","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Provide knowledge and tools for effective decision making and public engagement"} +{"code":"H2020-EU.3.3.6.","title":"Robust decision making and public engagement","shortTitle":"Robust decision making and public engagement","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Robust decision making and public engagement","classification_short":"Societal Challenges | Energy | Robust decision making and public engagement"} +{"code":"H2020-EU.3.1.7.2.","title":"Osteoarthritis","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Osteoarthritis","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Osteoarthritis"} +{"code":"H2020-EU.2.1.1.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Information and Communication Technologies (ICT)","shortTitle":"Information and Communication Technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT)","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies"} +{"code":"H2020-EU.2.1.6.2.","title":"Enabling advances in space technology","shortTitle":"Enabling advances in space technology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling advances in space technology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Enabling advances in space technology"} +{"code":"H2020-EU.2.1.1.4.","title":"Content technologies and information management: ICT for digital content, cultural and creative industries","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Content technologies and information management: ICT for digital content, cultural and creative industries","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Content technologies and information management: ICT for digital content, cultural and creative industries"} +{"code":"H2020-EU.2.1.5.4.","title":"New sustainable business models","shortTitle":"New sustainable business models","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | New sustainable business models","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | New sustainable business models"} +{"code":"H2020-EU.2.1.4.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Biotechnology","shortTitle":"Biotechnology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json index 058ce8877..ca2c0f165 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json @@ -1,17 +1,17 @@ -{"rcn":"229267","id":"894593","acronym":"ICARUS","status":"SIGNED","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019","frameworkProgramme":"H2020","title":"INTEGRATED COMMON ALTITUDE REFERENCE SYSTEM FOR U-SPACE","startDate":"2020-05-01","endDate":"2022-07-31","projectUrl":"","objective":"ICARUS project proposes an innovative solution to the challenge of the Common Altitude Reference inside VLL airspaces with the definition of a new U-space service and its validation in a real operational environment. In manned aviation, the methods of determining the altitude of an aircraft are based on pressure altitude difference measurements (e.g. QFE, QNH and FL) referred to a common datum. \nThe UA flights superimpose a new challenge, since a small drone may take off and land almost from everywhere, hence reducing the original significance of QFE settings, introduced on behalf of manned pilots to display on the altimeter the 0-height at touchdown on the local runway. In fact, the possibility for n drones to take off at n different places would generate a series of n different QFE corresponding to different heights of ground pressures referred to the take-off “Home points”. Therefore for a large number drones, new methodologies and procedures shall be put in place. The ICARUS defines a new U-space U3 service tightly coupled with the interface of the existing U-space services (e.g. Tracking, and Flight Planning services). The users of ICARUS service shall be remote pilots competent to fly in BVLOS in the specific category of UAS operations and ultralight GA pilots potentially sharing the same VLL airspace. \nThe ICARUS proposed approach foresees the realization of DTM service embedded in an Application Program Interface (API) that can be queried by UAS pilot/operator (or by drone itself) based on the actual positioning of the UA along its trajectory, computed by the (E)GNSS receiver. The output of the DTM service would provide information on distance from ground/obstacles in combination with the common altitude reference.\nAccuracy, continuity, integrity and availability requirements for GNSS-based altimetry together with accuracy and resolution requirements of the DTM to be provided by ICARUS service are key topics of the study.","totalCost":"1385286,25","ecMaxContribution":"1144587,5","call":"H2020-SESAR-2019-2","fundingScheme":"SESAR-RIA","coordinator":"E-GEOS SPA","coordinatorCountry":"IT","participants":"TOPVIEW SRL;TELESPAZIO SPA;DRONERADAR SP Z O.O.;EUROCONTROL - EUROPEAN ORGANISATION FOR THE SAFETY OF AIR NAVIGATION;EUROUSC ESPANA SL;POLITECNICO DI MILANO;UNIVERSITA DEGLI STUDI DI ROMA LA SAPIENZA","participantCountries":"IT;PL;BE;ES","subjects":""} -{"rcn":"229284","id":"897004","acronym":"ISLand","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Isolation and Segregation Landscape. Archaeology of quarantine in the Indian Ocean World","startDate":"2020-11-01","endDate":"2023-10-31","projectUrl":"","objective":"The proposed research presents an experimental and completely novel investigation within the historical archaeology,\napplied to isolated contexts. The main objective of ISLand is to provide a new way of thinking about human interactions\nwithin colonial empires and bringing colonial studies into dialogue with medical history and the emerging concept of\nhealthscaping. It seeks to do so by studying quarantine facilities in the Indian Ocean World during the long nineteenth\ncentury, a crucial period for the history of European empires in that region and a flashpoint for the conceptualization of\nmodern public health. Quarantine, traditionally viewed as merely a mechanism for the control of disease, will be analyzed as\nthe outward material response to important changes taking place socially, ecologically, and politically at the time.\nThe project is a part of an international, interdisciplinary effort, combining history, archaeology, and anthropology. The\nresearcher will tap numerous archival sources and archaeological data from selected sites, examine them through social and\nspatial analysis, and systematically analyze a test case in Mauritius through the most innovative methods that target\nlandscape and standing archaeology.\nThe broader impacts of ISLand have relevance for current European approaches to the migration crisis, where the threat of\ndisease has been ignited as a potentially debilitating consequence of immigration from extra-European countries. The\ntraining-through-research project at the Stanford University, the top institution where acquiring knowledge and skills in\nhistorical archaeology, will allow the applicant to develop into a position of professional maturity with a specific\ninterdisciplinary set of skills. With the support of the host institutions in EU, the researcher will promote historical archaeology\nin European academy, stimulating new approaches in usual archaeological research and an interdisciplinary approach with\ncultural anthropology.","totalCost":"253052,16","ecMaxContribution":"253052,16","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-GF","coordinator":"UNIVERSITEIT VAN AMSTERDAM","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":"229281","id":"896300","acronym":"STRETCH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Smart Textiles for RETrofitting and Monitoring of Cultural Heritage Buildings","startDate":"2020-09-01","endDate":"2022-08-31","projectUrl":"","objective":"This project aims to develop novel techniques using smart multifunctional materials for the combined seismic-plus-energy retrofitting, and Structural Health Monitoring (SHM) of the European cultural heritage buildings (CHB). The need for upgrading the existing old and CHB is becoming increasingly important for the EU countries, due to: (1) their poor structural performance during recent earthquakes (e.g. Italy, Greece) or other natural hazards (e.g. extreme weather conditions) that have resulted in significant economic losses, and loss of human lives; and (2) their low energy performance which increases significantly their energy consumption (buildings are responsible for 40% of EU energy consumption). Moreover, the SHM of the existing buildings is crucial for assessing continuously their structural integrity and thus to provide information for planning cost effective and sustainable maintenance decisions. Since replacing the old buildings with new is not financially feasible, and even it is not allowed for CHB, their lifetime extension requires considering simultaneously both structural and energy retrofitting. It is noted that the annual cost of repair and maintenance of existing European building stock is estimated to be about 50% of the total construction budget, currently standing at more than €300 billion. To achieve cost effectiveness, STRETCH explores a novel approach, which integrates technical textile reinforcement with thermal insulation systems and strain sensors to provide simultaneous structural-plus-energy retrofitting combined with SHM, tailored for masonry cultural heritage building envelopes. The effectiveness of the proposed retrofitting system will be validated experimentally and analytically. Moreover, draft guidelines and recommendations for determining future research on the use of smart composite materials for the concurrent retrofitting (structural-plus-energy) and SHM of the existing cultural heritage buildings envelopes will be proposed.","totalCost":"183473,28","ecMaxContribution":"183473,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"JRC -JOINT RESEARCH CENTRE- EUROPEAN COMMISSION","coordinatorCountry":"BE","participants":"","participantCountries":"","subjects":""} -{"rcn":"229265","id":"892890","acronym":"RhythmicPrediction","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Rhythmic prediction in speech perception: are our brain waves in sync with our native language?","startDate":"2021-01-01","endDate":"2022-12-31","projectUrl":"","objective":"Speech has rhythmic properties that widely differ across languages. When we listen to foreign languages, we may perceive them to be more musical, or rather more rap-like than our own. Even if we are unaware of it, the rhythm and melody of language, i.e. prosody, reflects its linguistic structure. On the one hand, prosody emphasizes content words and new information with stress and accents. On the other hand, it is aligned to phrase edges, marking them with boundary tones. Prosody hence helps the listener to focus on important words and to chunk sentences into phrases, and phrases into words. In fact, prosody is even used predictively, for instance to time the onset of the next word, the next piece of new information, or the total remaining length of the utterance, so the listener can seamlessly start their own speaking turn. \nSo, the listener, or rather their brain, is actively predicting when important speech events will happen, using prosody. How prosodic rhythms are exploited to predict speech timing, however, is unclear. No link between prosody and neural predictive processing has yet been empirically made. One hypothesis is that rhythm, such as the alternation of stressed and unstressed syllables, helps listeners time their attention. Similar behavior is best captured by the notion of an internal oscillator which can be set straight by attentional spikes. While neuroscientific evidence for the relation of neural oscillators to speech processing is starting to emerge, no link to the use of prosody nor predictive listening exists, yet. Furthermore, it is still unknown how native language knowledge affects cortical oscillations, and how oscillations are affected by cross-linguistic differences in rhythmic structure. The current project combines the standing knowledge of prosodic typology with the recent advances in neuroscience on cortical oscillations, to investigate the role of internal oscillators on native prosody perception, and active speech prediction.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE DE GENEVE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229235","id":"886828","acronym":"ASAP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Advanced Solutions for Asphalt Pavements","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"The Advanced Solutions for Asphalt Pavements (ASAP) project involves the development of a unique road paving technology which will use a bio-bitumen rejuvenator to rejuvenate aged asphalt bitumen. This technology will help to extend the lifespan of asphalt pavements (roads) and will reduce the environmental and economic impact of roads and road maintenance processes. Recycling and self-healing processes will replace fossil fuel dependent technology. Self-healing will involve rejuvenating aged asphalt bitumen using a bio-rejuvenator developed using microalgae oils (rejuvenating bio-oil). Microalgae has been selected because of its fast growth, versatility and ability to survive within hostile environments, such as wastewater. \n\nASAP will utilise microalgae, cultivated within the wastewater treatment process, as a source of the rejuvenating bio-oil. The solvent (Soxhlet) processes will be used to extract the oil from the microalgae. To ensure the efficiency of the oil extraction process, an ultrasonication process will be used to pre-treat the microalgae. The suitability of rejuvenating bio-oil as a replacement for the bitumen rejuvenator (fossil fuel based) will be ascertained via a series of standard bituminous and accelerated tests. A rejuvenator-binder diffusion numerical model will be developed, based on the Delft Lattice concrete diffusion model, to determine the conditions required for rejuvenation to occur and to ascertain the healing rate of the asphalt binder. These parameters will facilitate the selection and optimisation of the asphalt self-healing systems (specifically the amount of bio-oil rejuvenator and time required) to achieve full rejuvenation. \n\nThis novel approach will benchmark the effectiveness of this intervention against existing asphalt design and maintenance processes and assess feasibility. The ASAP project presents an opportunity to revolutionise road design and maintenance processes and reduce its environmental and financial costs.","totalCost":"187572,48","ecMaxContribution":"187572,48","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"NEDERLANDSE ORGANISATIE VOOR TOEGEPAST NATUURWETENSCHAPPELIJK ONDERZOEK TNO","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":null,"id":"886776","acronym":null,"status":null,"programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} -{"rcn":null,"id":"886776","acronym":null,"status":null,"programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} -{"rcn":"229276","id":"895426","acronym":"DisMoBoH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Dissecting the molecular building principles of locally formed transcriptional hubs","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Numerous DNA variants have already been identified that modulate inter-individual molecular traits – most prominently gene expression. However, since finding mechanistic interpretations relating genotype to phenotype has proven challenging, the focus has shifted to higher-order regulatory features, i.e. chromatin accessibility, transcription factor (TF) binding and 3D chromatin interactions. This revealed at least two enhancer types: “lead” enhancers in which the presence of genetic variants modulates the activity of entire chromatin domains, and “dependent” ones in which variants induce subtle changes, affecting DNA accessibility, but not transcription. Although cell type-specific TFs are likely important, it remains unclear which sequence features are required to establish such enhancer hierarchies, and under which circumstances genetic variation results in altered enhancer-promoter contacts and differential gene expression. Here, we propose to investigate the molecular mechanisms that link DNA variation to TF binding, chromatin topology, and gene expression response. We will leverage data on enhancer hierarchy and sequence-specific TF binding to identify the sequence signatures that define “lead” enhancers. The results will guide the design of a synthetic locus that serves as an in vivo platform to systematically vary the building blocks of local transcriptional units: i) DNA sequence – including variations in TF binding site affinity and syntax, ii) molecular interactions between TFs, and iii) chromatin conformation. To validate our findings, we will perform optical reconstruction of chromatin architecture for a select number of DNA variants. By simultaneously perturbing co-dependent features, this proposal will provide novel mechanistic insights into the formation of local transcriptional hubs.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-RI","coordinator":"ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229288","id":"898218","acronym":"devUTRs","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Uncovering the roles of 5′UTRs in translational control during early zebrafish development","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Following fertilisation, metazoan embryos are transcriptionally silent, and embryogenesis is controlled by maternally deposited factors. Developmental progression requires the synthesis of new mRNAs and proteins in a coordinated fashion. Many posttranscriptional mechanisms regulate the fate of maternal mRNAs, but it is less understood how translational control shapes early embryogenesis. In eukaryotes, translation starts at the mRNA 5′ end, consisting of the 5′ cap and 5′ untranslated region (UTR). Protein synthesis is primarily regulated at the translation initiation step by elements within the 5′UTR. However, the role of 5′UTRs in regulating the dynamics of mRNA translation during vertebrate embryogenesis remains unexplored. For example, all vertebrate ribosomal protein (RP) mRNAs harbor a conserved terminal oligopyrimidine tract (TOP) in their 5′UTR. RP levels must be tightly controlled to ensure proper organismal development, but if and how the TOP motif mediates RP mRNA translational regulation during embryogenesis is unclear. Overall, we lack a systematic understanding of the regulatory information contained in 5′UTRs. In this work, I aim to uncover the 5′UTR in vivo rules for mRNA translational regulation during zebrafish embryogenesis. I propose to apply imaging and biochemical approaches to characterise the role of the TOP motif in RP mRNA translational regulation during embryogenesis and identify the trans-acting factor(s) that bind(s) to it (Aim 1). To systematically assess the contribution of 5′UTRs to mRNA translational regulation during zebrafish embryogenesis, I will couple a massively parallel reporter assay of 5′UTRs to polysome profiling (Aim 2). By integrating the translational behaviour of 5′UTR reporters throughout embryogenesis with sequence-based regression models, I anticipate to uncover novel cis-regulatory elements in 5′UTRs with developmental roles.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITAT BASEL","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229261","id":"893787","acronym":"HOLYHOST","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Welfare and Hosting buildings in the “Holy Land” between the 4th and the 7th c. AD","startDate":"2020-10-01","endDate":"2022-09-30","projectUrl":"","objective":"Between the 4th and the 7th century AD, many hospices dedicated to the poor, elderly, strangers and travelers were built in the countryside, along roads, around and inside cities. They were commissioned by the Church, rich pious men and women concerned by the redeem of their sins, as well as emperors who saw this as a guarantee of social stability. Welfare is thus an important phenomena of Late Antiquity, abundantly mentioned by ancient literary sources and inscriptions, particularly in the eastern part of the Empire. However, the buildings that provided shelter and care to the needy have not yet received sufficient attention from archaeologists. Except for buildings which were identified by their inventors as hostels dedicated to pilgrims, they are still invisible in the field. \nThe aim of the HOLYHOST research project is to bring this social history’s main topic on the field of archaeology. It will address the welfare issue through the archaeological and architectural survey and study of Ancient welfare and hosting establishments’ remains, in the Holy Land (Palestine and Jordan) and around. This work will contribute to a better understanding of the practices linked to hospitality, welfare, accommodation and care in Antiquity. Moreover, such establishments served as models for medieval and modern Islamic, Jewish and Christian waqf institutions (religious endowment), and welfare continues to be highly relevant nowadays, through issues still at the heart of contemporary challenges debated in Europe: poverty, social exclusion, migrant crisis, principle of reception and hospitality. This interdisciplinary and diachronic research project will thus offer many new research perspectives, in terms of history of architecture, evolution of care practices, social and political regulations.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE PARIS I PANTHEON-SORBONNE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229282","id":"896189","acronym":"MICADO","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Microbial contribution to continental wetland carbon budget","startDate":"2021-01-04","endDate":"2023-01-03","projectUrl":"","objective":"Continental wetlands are major carbon dioxide sinks but the second largest source of methane. Monitoring of wetland methane emissions revealed large inter-site variability that is hard to explain in the framework of current biogeochemical theories. Methane production in wetlands is an anaerobic microbial driven process involving a complex set of microbial metabolisms depending on the availability of (i) energy (via the presence of specific redox couples), (ii) organic substrates and (iii) specific microbial communities. To understand the complexity of microbial drivers on wetland methane emissions and quantify their contribution, the MICADO project will set up a multidisciplinary approach linking isotope organic geochemistry and environmental microbiology to assess microbial functioning in situ. As an organic geochemist I have developed an innovative approach to trace in situ microbial activity via compound specific carbon isotope analysis of microbe macromolecules and organic metabolites. The host institution is a leader in France in environmental microbiology and biogeochemistry developing high-throughput metagenomics and microbial rate assessments, for which I will be trained during the MICADO project. These techniques are highly complementary and combined they will provide a comprehensive knowledge on microbial metabolisms involved in organic matter degradation encompassing their complexity and interactions. This will revisit the relationships between organic substrate availability and microbial communities and will contribute at estimating the impact of microbial activity on wetland methane emissions. This project will give me the opportunity to acquire fundamental knowledge and to develop original lines of research that will consolidate my position as an independent scientist in biogeochemistry.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE CNRS","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229249","id":"891624","acronym":"CuTAN","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Copper-Catalyzed Multicomponent Reactions in Tandem Processes for Target Molecule Synthesis","startDate":"2021-02-01","endDate":"2023-01-31","projectUrl":"","objective":"The invention of processes that can form several bonds, stereocentres and rings in a single process is key to a sustainable future in synthetic chemistry. Multicomponent reactions and tandem procedures are two strategies that enable the rapid build-up of molecular complexity from simple reagents. By combining these two strategies into a single procedure, the diversity, complexity and value of products can be further enhanced along with the efficiency and economy of their construction. In this project, Dr Satpathi will develop novel copper-catalyzed multicomponent couplings of unsaturated hydrocarbons (e.g. allenes, enynes) with imines and boron reagents. These procedures will provide high-value amine products with universally high regio-, diastero- and enantiocontrol. The products will bear a variety of synthetic handles, for example, amino, alkynyl/alkenyl, and boryl groups, thus the products are primed for subsequent transformation. Dr Satpathi will exploit this functionality in tandem intramolecular couplings (e.g. intramolecular Suzuki/Buchwald-Hartwig reactions) to provide core cyclic structures of drug molecules and natural products. Thus, through a tandem procedure of; 1) copper-catalyzed borofunctionalization, and; 2) subsequent transition-metal catalyzed cyclization, he will gain efficient access to highly sought-after complex molecules. Overall, the process will provide high-value, chiral, cyclic motifs from abundant, achiral, linear substrates. Finally, Dr Satpathi has identified the phthalide-isoquinoline family of alkaloids as target molecules to display the power of his tandem methodology. Dr Satpathi has devised a novel route, which begins with our tandem multifunctionalization/cyclization reaction, to provide a range of these important alkaloids. The chosen alkaloids are of particular interest as they display a range of bioactivities – for example as natural products, receptor antagonists and on-market drugs.","totalCost":"212933,76","ecMaxContribution":"212933,76","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"THE UNIVERSITY OF MANCHESTER","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} -{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} -{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} -{"rcn":"229258","id":"892834","acronym":"DENVPOC","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"qPCR Microfluidics point-of-care platform for dengue diagnosis","startDate":"2020-05-18","endDate":"2022-05-17","projectUrl":"","objective":"As a result of Global climate change and fast urbanization, global outbreaks of Dengue (DENV)/ Zika(ZIKV)/Chikungunya(CHIKV) virus have the potential to occur. The most common pathway of these infections in humans is through the female Aedes mosquito vector. DENV is an exanthematous febrile disease with varied clinical manifestations and progressions . Due to similarities in symptoms between DENV and ZIKV and CHIKV, it is difficult to make a differential diagnosis, impeding appropriate, timely medical intervention. Furthermore, cross-reactivity with ZIKV, which was recently related to microcephaly, is a serious issue. In 2016, in Brazil alone, there were 4180 microcephaly cases reported instead of 163 cases, more in line with yearly expected projections , , Thus, the sooner an accurate diagnostic which differentiates DENV from the other manifestations is critical; most especially at the early stages of the infection, to have a reliable diagnosis in pregnant women. In 2016, the OMS emergency committee declared that the outbreaks and the potentially resultant neurological disorders in Brazil were an important international state of emergency in public health, as a result of the associated secondary effects; these diseases became a Global concern. This project allows developing a highly and fast Multiplex qPCR POC platform by using FASTGENE technology with a minimal amount of patient serotype. It would reduce the time of analysis (30 to 90’ for a standard) and costs. Additionally, the sample preprocessing and thermalization will shorten real-time PCR amplification time and will be integrated within the microfluidic systems. This platform can result in a commercialized product whereupon a main market target would be pregnant women and people living or traveling through/from outbreak risk areas.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-SE","coordinator":"BFORCURE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229280","id":"895716","acronym":"DoMiCoP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"The Diffusion of Migration Control Practice. Actors, Processes and Effects.","startDate":"2021-03-01","endDate":"2023-02-28","projectUrl":"","objective":"DoMiCoP develops new understandings and perspectives to study migration control in practice in the European Union by asking one main question: how and why do communities of practice develop and diffuse the knowledge required to put migration control into action? Unlike the nexus between expert knowledge, epistemic communities and policy formulation, the nexus between everyday knowledge, communities of practice and policy implementation has not yet received systematic scholarly attention. My project bridges that gap by focusing on intermediate arenas in which communities of practice take shape most notably the meetings and trainings that gather state and non-state actors involved in putting asylum, detention and removal into practice. By building on field-based methodologies (interviews and participant observations), DoMiCoP sheds ethnographic light on the role that ‘learning from abroad’ plays in the implementation of migration control in the EU. My project’s aim is threefold: 1) Identifying arenas at intermediate levels in which communities of practice take shape; 2) Analysing the communities of practice by focusing on the configurations of actors and organizations involved, the motivations underlying their involvement, the process of knowledge development in interaction, the conflicts and negotiations; 3) Revealing the role of non-state organizations (private for profit and not-for-profit). From a theoretical point of view, this project goes beyond the classical view of the implementation as a test to assess the effectiveness of policy transfers towards an analysis of policy transfer at that level of policy-making. From an empirical point of view, the project expands knowledge about less-studied venues of policy-making and provides original thick descriptions. From a methodological point of view, the project engages with qualitative methods for the study of policy diffusion and aims at responding to their main challenges through participant observation.","totalCost":"163673,28","ecMaxContribution":"163673,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"EUROPEAN UNIVERSITY INSTITUTE","coordinatorCountry":"IT","participants":"","participantCountries":"","subjects":""} -{"rcn":"230066","id":"883730","acronym":"SOLSPACE","status":"SIGNED","programme":"H2020-EU.1.1.","topics":"ERC-2019-ADG","frameworkProgramme":"H2020","title":"Enhancing Global Clean Energy Services Using Orbiting Solar Reflectors", "startDate":"2021-03-01","endDate":"2025-11-30","projectUrl":"","objective":"fake", "totalCost":"2496392","ecMaxContribution":"2496392","call":"ERC-2019-ADG","fundingScheme":"ERC-ADG","coordinator":"UNIVERSITY OF GLASGOW","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} +{"id":"894593","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019"} +{"id":"897004","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896300","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"892890","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886828","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886776","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D4"} +{"id":"886776","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4"} +{"id":"895426","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"898218","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"893787","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896189","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"891624","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"887259","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D3"} +{"id":"887259","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3"} +{"id":"892834","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"895716","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"883730","programme":"H2020-EU.1.1.","topics":"ERC-2019-ADG"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json index edf83fbc8..ae1b7cb82 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json @@ -1,16 +1,16 @@ -{"rcn":"229267","id":"894593","acronym":"ICARUS","status":"SIGNED","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019","frameworkProgramme":"H2020","title":"INTEGRATED COMMON ALTITUDE REFERENCE SYSTEM FOR U-SPACE","startDate":"2020-05-01","endDate":"2022-07-31","projectUrl":"","objective":"ICARUS project proposes an innovative solution to the challenge of the Common Altitude Reference inside VLL airspaces with the definition of a new U-space service and its validation in a real operational environment. In manned aviation, the methods of determining the altitude of an aircraft are based on pressure altitude difference measurements (e.g. QFE, QNH and FL) referred to a common datum. \nThe UA flights superimpose a new challenge, since a small drone may take off and land almost from everywhere, hence reducing the original significance of QFE settings, introduced on behalf of manned pilots to display on the altimeter the 0-height at touchdown on the local runway. In fact, the possibility for n drones to take off at n different places would generate a series of n different QFE corresponding to different heights of ground pressures referred to the take-off “Home points”. Therefore for a large number drones, new methodologies and procedures shall be put in place. The ICARUS defines a new U-space U3 service tightly coupled with the interface of the existing U-space services (e.g. Tracking, and Flight Planning services). The users of ICARUS service shall be remote pilots competent to fly in BVLOS in the specific category of UAS operations and ultralight GA pilots potentially sharing the same VLL airspace. \nThe ICARUS proposed approach foresees the realization of DTM service embedded in an Application Program Interface (API) that can be queried by UAS pilot/operator (or by drone itself) based on the actual positioning of the UA along its trajectory, computed by the (E)GNSS receiver. The output of the DTM service would provide information on distance from ground/obstacles in combination with the common altitude reference.\nAccuracy, continuity, integrity and availability requirements for GNSS-based altimetry together with accuracy and resolution requirements of the DTM to be provided by ICARUS service are key topics of the study.","totalCost":"1385286,25","ecMaxContribution":"1144587,5","call":"H2020-SESAR-2019-2","fundingScheme":"SESAR-RIA","coordinator":"E-GEOS SPA","coordinatorCountry":"IT","participants":"TOPVIEW SRL;TELESPAZIO SPA;DRONERADAR SP Z O.O.;EUROCONTROL - EUROPEAN ORGANISATION FOR THE SAFETY OF AIR NAVIGATION;EUROUSC ESPANA SL;POLITECNICO DI MILANO;UNIVERSITA DEGLI STUDI DI ROMA LA SAPIENZA","participantCountries":"IT;PL;BE;ES","subjects":""} -{"rcn":"229284","id":"897004","acronym":"ISLand","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Isolation and Segregation Landscape. Archaeology of quarantine in the Indian Ocean World","startDate":"2020-11-01","endDate":"2023-10-31","projectUrl":"","objective":"The proposed research presents an experimental and completely novel investigation within the historical archaeology,\napplied to isolated contexts. The main objective of ISLand is to provide a new way of thinking about human interactions\nwithin colonial empires and bringing colonial studies into dialogue with medical history and the emerging concept of\nhealthscaping. It seeks to do so by studying quarantine facilities in the Indian Ocean World during the long nineteenth\ncentury, a crucial period for the history of European empires in that region and a flashpoint for the conceptualization of\nmodern public health. Quarantine, traditionally viewed as merely a mechanism for the control of disease, will be analyzed as\nthe outward material response to important changes taking place socially, ecologically, and politically at the time.\nThe project is a part of an international, interdisciplinary effort, combining history, archaeology, and anthropology. The\nresearcher will tap numerous archival sources and archaeological data from selected sites, examine them through social and\nspatial analysis, and systematically analyze a test case in Mauritius through the most innovative methods that target\nlandscape and standing archaeology.\nThe broader impacts of ISLand have relevance for current European approaches to the migration crisis, where the threat of\ndisease has been ignited as a potentially debilitating consequence of immigration from extra-European countries. The\ntraining-through-research project at the Stanford University, the top institution where acquiring knowledge and skills in\nhistorical archaeology, will allow the applicant to develop into a position of professional maturity with a specific\ninterdisciplinary set of skills. With the support of the host institutions in EU, the researcher will promote historical archaeology\nin European academy, stimulating new approaches in usual archaeological research and an interdisciplinary approach with\ncultural anthropology.","totalCost":"253052,16","ecMaxContribution":"253052,16","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-GF","coordinator":"UNIVERSITEIT VAN AMSTERDAM","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":"229281","id":"896300","acronym":"STRETCH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Smart Textiles for RETrofitting and Monitoring of Cultural Heritage Buildings","startDate":"2020-09-01","endDate":"2022-08-31","projectUrl":"","objective":"This project aims to develop novel techniques using smart multifunctional materials for the combined seismic-plus-energy retrofitting, and Structural Health Monitoring (SHM) of the European cultural heritage buildings (CHB). The need for upgrading the existing old and CHB is becoming increasingly important for the EU countries, due to: (1) their poor structural performance during recent earthquakes (e.g. Italy, Greece) or other natural hazards (e.g. extreme weather conditions) that have resulted in significant economic losses, and loss of human lives; and (2) their low energy performance which increases significantly their energy consumption (buildings are responsible for 40% of EU energy consumption). Moreover, the SHM of the existing buildings is crucial for assessing continuously their structural integrity and thus to provide information for planning cost effective and sustainable maintenance decisions. Since replacing the old buildings with new is not financially feasible, and even it is not allowed for CHB, their lifetime extension requires considering simultaneously both structural and energy retrofitting. It is noted that the annual cost of repair and maintenance of existing European building stock is estimated to be about 50% of the total construction budget, currently standing at more than €300 billion. To achieve cost effectiveness, STRETCH explores a novel approach, which integrates technical textile reinforcement with thermal insulation systems and strain sensors to provide simultaneous structural-plus-energy retrofitting combined with SHM, tailored for masonry cultural heritage building envelopes. The effectiveness of the proposed retrofitting system will be validated experimentally and analytically. Moreover, draft guidelines and recommendations for determining future research on the use of smart composite materials for the concurrent retrofitting (structural-plus-energy) and SHM of the existing cultural heritage buildings envelopes will be proposed.","totalCost":"183473,28","ecMaxContribution":"183473,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"JRC -JOINT RESEARCH CENTRE- EUROPEAN COMMISSION","coordinatorCountry":"BE","participants":"","participantCountries":"","subjects":""} -{"rcn":"229265","id":"892890","acronym":"RhythmicPrediction","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Rhythmic prediction in speech perception: are our brain waves in sync with our native language?","startDate":"2021-01-01","endDate":"2022-12-31","projectUrl":"","objective":"Speech has rhythmic properties that widely differ across languages. When we listen to foreign languages, we may perceive them to be more musical, or rather more rap-like than our own. Even if we are unaware of it, the rhythm and melody of language, i.e. prosody, reflects its linguistic structure. On the one hand, prosody emphasizes content words and new information with stress and accents. On the other hand, it is aligned to phrase edges, marking them with boundary tones. Prosody hence helps the listener to focus on important words and to chunk sentences into phrases, and phrases into words. In fact, prosody is even used predictively, for instance to time the onset of the next word, the next piece of new information, or the total remaining length of the utterance, so the listener can seamlessly start their own speaking turn. \nSo, the listener, or rather their brain, is actively predicting when important speech events will happen, using prosody. How prosodic rhythms are exploited to predict speech timing, however, is unclear. No link between prosody and neural predictive processing has yet been empirically made. One hypothesis is that rhythm, such as the alternation of stressed and unstressed syllables, helps listeners time their attention. Similar behavior is best captured by the notion of an internal oscillator which can be set straight by attentional spikes. While neuroscientific evidence for the relation of neural oscillators to speech processing is starting to emerge, no link to the use of prosody nor predictive listening exists, yet. Furthermore, it is still unknown how native language knowledge affects cortical oscillations, and how oscillations are affected by cross-linguistic differences in rhythmic structure. The current project combines the standing knowledge of prosodic typology with the recent advances in neuroscience on cortical oscillations, to investigate the role of internal oscillators on native prosody perception, and active speech prediction.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE DE GENEVE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229235","id":"886828","acronym":"ASAP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Advanced Solutions for Asphalt Pavements","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"The Advanced Solutions for Asphalt Pavements (ASAP) project involves the development of a unique road paving technology which will use a bio-bitumen rejuvenator to rejuvenate aged asphalt bitumen. This technology will help to extend the lifespan of asphalt pavements (roads) and will reduce the environmental and economic impact of roads and road maintenance processes. Recycling and self-healing processes will replace fossil fuel dependent technology. Self-healing will involve rejuvenating aged asphalt bitumen using a bio-rejuvenator developed using microalgae oils (rejuvenating bio-oil). Microalgae has been selected because of its fast growth, versatility and ability to survive within hostile environments, such as wastewater. \n\nASAP will utilise microalgae, cultivated within the wastewater treatment process, as a source of the rejuvenating bio-oil. The solvent (Soxhlet) processes will be used to extract the oil from the microalgae. To ensure the efficiency of the oil extraction process, an ultrasonication process will be used to pre-treat the microalgae. The suitability of rejuvenating bio-oil as a replacement for the bitumen rejuvenator (fossil fuel based) will be ascertained via a series of standard bituminous and accelerated tests. A rejuvenator-binder diffusion numerical model will be developed, based on the Delft Lattice concrete diffusion model, to determine the conditions required for rejuvenation to occur and to ascertain the healing rate of the asphalt binder. These parameters will facilitate the selection and optimisation of the asphalt self-healing systems (specifically the amount of bio-oil rejuvenator and time required) to achieve full rejuvenation. \n\nThis novel approach will benchmark the effectiveness of this intervention against existing asphalt design and maintenance processes and assess feasibility. The ASAP project presents an opportunity to revolutionise road design and maintenance processes and reduce its environmental and financial costs.","totalCost":"187572,48","ecMaxContribution":"187572,48","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"NEDERLANDSE ORGANISATIE VOOR TOEGEPAST NATUURWETENSCHAPPELIJK ONDERZOEK TNO","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":"229236","id":"886776","acronym":"BIOBESTicide","status":"SIGNED","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} -{"rcn":"229276","id":"895426","acronym":"DisMoBoH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Dissecting the molecular building principles of locally formed transcriptional hubs","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Numerous DNA variants have already been identified that modulate inter-individual molecular traits – most prominently gene expression. However, since finding mechanistic interpretations relating genotype to phenotype has proven challenging, the focus has shifted to higher-order regulatory features, i.e. chromatin accessibility, transcription factor (TF) binding and 3D chromatin interactions. This revealed at least two enhancer types: “lead” enhancers in which the presence of genetic variants modulates the activity of entire chromatin domains, and “dependent” ones in which variants induce subtle changes, affecting DNA accessibility, but not transcription. Although cell type-specific TFs are likely important, it remains unclear which sequence features are required to establish such enhancer hierarchies, and under which circumstances genetic variation results in altered enhancer-promoter contacts and differential gene expression. Here, we propose to investigate the molecular mechanisms that link DNA variation to TF binding, chromatin topology, and gene expression response. We will leverage data on enhancer hierarchy and sequence-specific TF binding to identify the sequence signatures that define “lead” enhancers. The results will guide the design of a synthetic locus that serves as an in vivo platform to systematically vary the building blocks of local transcriptional units: i) DNA sequence – including variations in TF binding site affinity and syntax, ii) molecular interactions between TFs, and iii) chromatin conformation. To validate our findings, we will perform optical reconstruction of chromatin architecture for a select number of DNA variants. By simultaneously perturbing co-dependent features, this proposal will provide novel mechanistic insights into the formation of local transcriptional hubs.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-RI","coordinator":"ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229288","id":"898218","acronym":"devUTRs","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Uncovering the roles of 5′UTRs in translational control during early zebrafish development","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Following fertilisation, metazoan embryos are transcriptionally silent, and embryogenesis is controlled by maternally deposited factors. Developmental progression requires the synthesis of new mRNAs and proteins in a coordinated fashion. Many posttranscriptional mechanisms regulate the fate of maternal mRNAs, but it is less understood how translational control shapes early embryogenesis. In eukaryotes, translation starts at the mRNA 5′ end, consisting of the 5′ cap and 5′ untranslated region (UTR). Protein synthesis is primarily regulated at the translation initiation step by elements within the 5′UTR. However, the role of 5′UTRs in regulating the dynamics of mRNA translation during vertebrate embryogenesis remains unexplored. For example, all vertebrate ribosomal protein (RP) mRNAs harbor a conserved terminal oligopyrimidine tract (TOP) in their 5′UTR. RP levels must be tightly controlled to ensure proper organismal development, but if and how the TOP motif mediates RP mRNA translational regulation during embryogenesis is unclear. Overall, we lack a systematic understanding of the regulatory information contained in 5′UTRs. In this work, I aim to uncover the 5′UTR in vivo rules for mRNA translational regulation during zebrafish embryogenesis. I propose to apply imaging and biochemical approaches to characterise the role of the TOP motif in RP mRNA translational regulation during embryogenesis and identify the trans-acting factor(s) that bind(s) to it (Aim 1). To systematically assess the contribution of 5′UTRs to mRNA translational regulation during zebrafish embryogenesis, I will couple a massively parallel reporter assay of 5′UTRs to polysome profiling (Aim 2). By integrating the translational behaviour of 5′UTR reporters throughout embryogenesis with sequence-based regression models, I anticipate to uncover novel cis-regulatory elements in 5′UTRs with developmental roles.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITAT BASEL","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229261","id":"893787","acronym":"HOLYHOST","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Welfare and Hosting buildings in the “Holy Land” between the 4th and the 7th c. AD","startDate":"2020-10-01","endDate":"2022-09-30","projectUrl":"","objective":"Between the 4th and the 7th century AD, many hospices dedicated to the poor, elderly, strangers and travelers were built in the countryside, along roads, around and inside cities. They were commissioned by the Church, rich pious men and women concerned by the redeem of their sins, as well as emperors who saw this as a guarantee of social stability. Welfare is thus an important phenomena of Late Antiquity, abundantly mentioned by ancient literary sources and inscriptions, particularly in the eastern part of the Empire. However, the buildings that provided shelter and care to the needy have not yet received sufficient attention from archaeologists. Except for buildings which were identified by their inventors as hostels dedicated to pilgrims, they are still invisible in the field. \nThe aim of the HOLYHOST research project is to bring this social history’s main topic on the field of archaeology. It will address the welfare issue through the archaeological and architectural survey and study of Ancient welfare and hosting establishments’ remains, in the Holy Land (Palestine and Jordan) and around. This work will contribute to a better understanding of the practices linked to hospitality, welfare, accommodation and care in Antiquity. Moreover, such establishments served as models for medieval and modern Islamic, Jewish and Christian waqf institutions (religious endowment), and welfare continues to be highly relevant nowadays, through issues still at the heart of contemporary challenges debated in Europe: poverty, social exclusion, migrant crisis, principle of reception and hospitality. This interdisciplinary and diachronic research project will thus offer many new research perspectives, in terms of history of architecture, evolution of care practices, social and political regulations.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE PARIS I PANTHEON-SORBONNE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229282","id":"896189","acronym":"MICADO","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Microbial contribution to continental wetland carbon budget","startDate":"2021-01-04","endDate":"2023-01-03","projectUrl":"","objective":"Continental wetlands are major carbon dioxide sinks but the second largest source of methane. Monitoring of wetland methane emissions revealed large inter-site variability that is hard to explain in the framework of current biogeochemical theories. Methane production in wetlands is an anaerobic microbial driven process involving a complex set of microbial metabolisms depending on the availability of (i) energy (via the presence of specific redox couples), (ii) organic substrates and (iii) specific microbial communities. To understand the complexity of microbial drivers on wetland methane emissions and quantify their contribution, the MICADO project will set up a multidisciplinary approach linking isotope organic geochemistry and environmental microbiology to assess microbial functioning in situ. As an organic geochemist I have developed an innovative approach to trace in situ microbial activity via compound specific carbon isotope analysis of microbe macromolecules and organic metabolites. The host institution is a leader in France in environmental microbiology and biogeochemistry developing high-throughput metagenomics and microbial rate assessments, for which I will be trained during the MICADO project. These techniques are highly complementary and combined they will provide a comprehensive knowledge on microbial metabolisms involved in organic matter degradation encompassing their complexity and interactions. This will revisit the relationships between organic substrate availability and microbial communities and will contribute at estimating the impact of microbial activity on wetland methane emissions. This project will give me the opportunity to acquire fundamental knowledge and to develop original lines of research that will consolidate my position as an independent scientist in biogeochemistry.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE CNRS","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229249","id":"891624","acronym":"CuTAN","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Copper-Catalyzed Multicomponent Reactions in Tandem Processes for Target Molecule Synthesis","startDate":"2021-02-01","endDate":"2023-01-31","projectUrl":"","objective":"The invention of processes that can form several bonds, stereocentres and rings in a single process is key to a sustainable future in synthetic chemistry. Multicomponent reactions and tandem procedures are two strategies that enable the rapid build-up of molecular complexity from simple reagents. By combining these two strategies into a single procedure, the diversity, complexity and value of products can be further enhanced along with the efficiency and economy of their construction. In this project, Dr Satpathi will develop novel copper-catalyzed multicomponent couplings of unsaturated hydrocarbons (e.g. allenes, enynes) with imines and boron reagents. These procedures will provide high-value amine products with universally high regio-, diastero- and enantiocontrol. The products will bear a variety of synthetic handles, for example, amino, alkynyl/alkenyl, and boryl groups, thus the products are primed for subsequent transformation. Dr Satpathi will exploit this functionality in tandem intramolecular couplings (e.g. intramolecular Suzuki/Buchwald-Hartwig reactions) to provide core cyclic structures of drug molecules and natural products. Thus, through a tandem procedure of; 1) copper-catalyzed borofunctionalization, and; 2) subsequent transition-metal catalyzed cyclization, he will gain efficient access to highly sought-after complex molecules. Overall, the process will provide high-value, chiral, cyclic motifs from abundant, achiral, linear substrates. Finally, Dr Satpathi has identified the phthalide-isoquinoline family of alkaloids as target molecules to display the power of his tandem methodology. Dr Satpathi has devised a novel route, which begins with our tandem multifunctionalization/cyclization reaction, to provide a range of these important alkaloids. The chosen alkaloids are of particular interest as they display a range of bioactivities – for example as natural products, receptor antagonists and on-market drugs.","totalCost":"212933,76","ecMaxContribution":"212933,76","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"THE UNIVERSITY OF MANCHESTER","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} -{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} -{"rcn":"229258","id":"892834","acronym":"DENVPOC","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"qPCR Microfluidics point-of-care platform for dengue diagnosis","startDate":"2020-05-18","endDate":"2022-05-17","projectUrl":"","objective":"As a result of Global climate change and fast urbanization, global outbreaks of Dengue (DENV)/ Zika(ZIKV)/Chikungunya(CHIKV) virus have the potential to occur. The most common pathway of these infections in humans is through the female Aedes mosquito vector. DENV is an exanthematous febrile disease with varied clinical manifestations and progressions . Due to similarities in symptoms between DENV and ZIKV and CHIKV, it is difficult to make a differential diagnosis, impeding appropriate, timely medical intervention. Furthermore, cross-reactivity with ZIKV, which was recently related to microcephaly, is a serious issue. In 2016, in Brazil alone, there were 4180 microcephaly cases reported instead of 163 cases, more in line with yearly expected projections , , Thus, the sooner an accurate diagnostic which differentiates DENV from the other manifestations is critical; most especially at the early stages of the infection, to have a reliable diagnosis in pregnant women. In 2016, the OMS emergency committee declared that the outbreaks and the potentially resultant neurological disorders in Brazil were an important international state of emergency in public health, as a result of the associated secondary effects; these diseases became a Global concern. This project allows developing a highly and fast Multiplex qPCR POC platform by using FASTGENE technology with a minimal amount of patient serotype. It would reduce the time of analysis (30 to 90’ for a standard) and costs. Additionally, the sample preprocessing and thermalization will shorten real-time PCR amplification time and will be integrated within the microfluidic systems. This platform can result in a commercialized product whereupon a main market target would be pregnant women and people living or traveling through/from outbreak risk areas.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-SE","coordinator":"BFORCURE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229280","id":"895716","acronym":"DoMiCoP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"The Diffusion of Migration Control Practice. Actors, Processes and Effects.","startDate":"2021-03-01","endDate":"2023-02-28","projectUrl":"","objective":"DoMiCoP develops new understandings and perspectives to study migration control in practice in the European Union by asking one main question: how and why do communities of practice develop and diffuse the knowledge required to put migration control into action? Unlike the nexus between expert knowledge, epistemic communities and policy formulation, the nexus between everyday knowledge, communities of practice and policy implementation has not yet received systematic scholarly attention. My project bridges that gap by focusing on intermediate arenas in which communities of practice take shape most notably the meetings and trainings that gather state and non-state actors involved in putting asylum, detention and removal into practice. By building on field-based methodologies (interviews and participant observations), DoMiCoP sheds ethnographic light on the role that ‘learning from abroad’ plays in the implementation of migration control in the EU. My project’s aim is threefold: 1) Identifying arenas at intermediate levels in which communities of practice take shape; 2) Analysing the communities of practice by focusing on the configurations of actors and organizations involved, the motivations underlying their involvement, the process of knowledge development in interaction, the conflicts and negotiations; 3) Revealing the role of non-state organizations (private for profit and not-for-profit). From a theoretical point of view, this project goes beyond the classical view of the implementation as a test to assess the effectiveness of policy transfers towards an analysis of policy transfer at that level of policy-making. From an empirical point of view, the project expands knowledge about less-studied venues of policy-making and provides original thick descriptions. From a methodological point of view, the project engages with qualitative methods for the study of policy diffusion and aims at responding to their main challenges through participant observation.","totalCost":"163673,28","ecMaxContribution":"163673,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"EUROPEAN UNIVERSITY INSTITUTE","coordinatorCountry":"IT","participants":"","participantCountries":"","subjects":""} -{"rcn":"229297","id":"954782","acronym":"MiniLLock","status":"SIGNED","programme":"H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.","topics":"EIC-SMEInst-2018-2020","frameworkProgramme":"H2020","title":"Mini Launch Lock devices for small satellites","startDate":"2020-05-01","endDate":"2022-04-30","projectUrl":"","objective":"Space industry is experiencing the most important paradigm shift in its history with the rise of small satellites and megaconstellations.\nSatellite miniaturization requires to reduce significantly production and orbit launching costs. To address the\nnew challenge of this manufacturing process and switch from craftsmanship to industrialization, space industry is turning\ntowards other domains looking for new solutions, disruptive technologies, and manufacturing process.\nMini Launch Lock devices for small satellites (MiniLLock) proposes innovative actuators on the cutting edge of customer\ndemand. They offer plug and play solutions that can directly be integrated into industry for satellites robotized production.\nMiniLLock is smaller, lighter, safer, with a longer lifetime and generates significantly less shocks and vibrations than\nstandard actuators such as electromagnet and pyrotechnics. MiniLLock offers performances which have never been reached\nwith any other materials.\nNimesis is the only company that can provide such cost-effective actuators suitable to small satellite with high performances\nand reliability, enabling features previously impossible.\nMiniLLock will accelerate and leverage the commercialization of Nimesis technology and ensure Europe worldwide\nleadership\nand independence in the new space emergent environment.\nNimesis ambitions to become the global leader of this domain with a turnover of € 26 million and a market share of 28% in\n2027.","totalCost":"2413543,75","ecMaxContribution":"1689480,63","call":"H2020-EIC-SMEInst-2018-2020-3","fundingScheme":"SME-2b","coordinator":"NIMESIS TECHNOLOGY SARL","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229299","id":"101003374","acronym":"NOPHOS","status":"SIGNED","programme":"H2020-EU.4.","topics":"WF-02-2019","frameworkProgramme":"H2020","title":"Unravelling protein phosphorylation mechanisms and phosphoproteome changes under nitrosative stress conditions in E.coli","startDate":"2020-07-01","endDate":"2022-06-30","projectUrl":"","objective":"Currently, we face a global antibiotic resistance crisis aggravated by the slow development of more effective and anti-resistance promoting therapeutical solutions. Protein phosphorylation (PP) has recently emerged as one of the major post-translational modification in bacteria, involved in the regulation of multiple physiological processes. In this MSCA individual fellowship application we aim to bridge the current gap in the field for prokaryotes by unravelling the unknown regulatory role of PP on proteins involved in nitrosative stress (NS) detoxification in the model bacterium E.coli. We propose to examine for the first time both global protein modifications (e.g. phosphoproteomics) under nitrogen species stress, as well as characterize PP in individual proteins involved in NS response. We will construct a network model that reflect the phosphoproteomic changes upon NS in E.coli, that may pave the way for the design of new bacterial targets. Understanding how bacteria respond to the chemical weapons of the human innate system is fundamental to develop efficient therapies. We will pioneer research on the mechanism and the regulation of nitric oxide detoxification proteins already identified as phosphorylated, by analyzing how this modification influences their stability and activity in vitro and in vivo. This project opens up new research paths on bacterial detoxification systems and signalling in general, addressing for the first time the role of PP in these processes. The proposal brings together transversal and scientific skills that will enable the researcher to lead the development of this emerging field and position herself as an expert in the area, and aims at establishing the importance of PP in NO microbial response, a novelty in this field.","totalCost":"147815,04","ecMaxContribution":"147815,04","call":"H2020-WF-02-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSIDADE NOVA DE LISBOA","coordinatorCountry":"PT","participants":"","participantCountries":"","subjects":""} \ No newline at end of file +{"id":"894593","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019"} +{"id":"897004","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896300","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"892890","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886828","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886776","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4"} +{"id":"895426","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"898218","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"893787","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896189","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"891624","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"887259","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3"} +{"id":"892834","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"895716","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"954782","programme":"H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.","topics":"EIC-SMEInst-2018-2020"} +{"id":"101003374","programme":"H2020-EU.4.","topics":"WF-02-2019"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz index 71f132ad16d88afcbba31ef4c7c6206c8e698c1d..5672d6044f2b502ef499f0c272222f5b9435fc19 100644 GIT binary patch literal 29456 zcmV(zK<2+6iwFpTP!(YS18{O>aA9&~WKeQ%XL4a}ZDn6~Xm4y~E^2dcZUD@^OOGST zl_t8szrs+9nYi2)YN5P>78iBVi&3q_j6b4Za?d$t?r!Ez#?zcNXs9cL52qb_96OKieCNM> z6sS;q^uPjRrj|0I*DxsHn zc!ht@M4T}-Vu?uSDo*7v68LYylVHMl95NZl>WXJl#ca4_Jc?MF@u`@oC=^NBVT*}` zPs)^~0=}NfE5S0wQWZ#%Em{r)-q)nOLY~O5@uy z01z&ns3bc#62jjRkHb4z4W4DEUB z#evEb_zf0-cq@<+1r+fZUy?CPK~kL9Y?7#aJV60wJTgN!e-xh)VXnu}apnQnBb1|2 zp5ipaYM9UO4S6a;#sipE_-z5x9m70DOZ#^NSV5RDdSEdI1y4*EpTiH#6UpHxf(egf z5v4nuL$Pfy@*oiKq@j!?Y!E$a!SYm~;P_7vCXccd|D5N;NCwezZG3{QdeohKsTIsTF2(ri0G)k4x)iZdC=IlL8*>60)toEi9Q4!Z*0 zg(r*nN+v4CW6Glrn+P6d`nf3|;R%zoIXrfE>pHY&vtN9e6Cf=R3*HHJrm{5%n$L>bSrghorcbE$HgraXji zrdm^g9!WEp;WP*BD`SpoT7vZSHB`+aTL^(lVWLz7U)nf%9r!DRCnWmZ1u#8PsjBVi z-`$J__KfvGqH%)isu+}|=W1t3rfH4}ajrl^NTdI(0|wtz32sW5QCU8>P~lJX?1?nf zU%cW`Ug%K#e2@rGy|l~Bp2jJS1l^e(8XoF+P-XLsTdJ&FAN`WE9o_M(JPXuJ5A=#B zQsr<9N)d&m_>3S+xER%}_&Mv_Fi4I=gdfC5gFca}+*^7maNS!k?QB6O&VtTBOC@~rdIw&z{N}MR`5Tw>AphK79=>gRwP}j^v zD0z9^=!<1Xlk(iX3T-EA|rUjA}c9ZU8e( zv4{s#*bt+UB>Ca(D-~U76Avc0m&27Mz~<6l-Sm4qnvhRnpFIEM*@j6e_c*M*bF#HU zDDRA2jr6%|QxCw>ipQ{lVq86KM$aT@P0-&oW*jO^a#Boez~Oc_ngBi%r_CjXcaDn} z_u)o^&CE^}tk3Rbbnf)?1*`#zx9tk959=;pXlLBcb$c%O%#5OWVwyHYq!tVo3JrAp z*4aTHK0MwiU-jTSKdyRIJCZvdN*DGE9F-+24$&d(IaHZ-Nw$29eSA7N{7GwLb*}gx z+>fAo(+SSf-+uMG-dUIJPq<9dzy0dl`grlpfZlBFy0Q=Ve!q9T-(zq4=dW1z_<$Xr zoUy?Pyn6m+ePjc3gDs@7#!*oZw9(SqMA{Bj|M2kS_@H;zV{e}H_V;^l-t>;&by<&f zk51q8yZAb6i|*NZZ@+)qeb+xZ?loipuZ4%W4#jyfR@Op>TES@>o?I02zAdq2)`nQ8 zXD6rKm%TF->L0aGupqs2w*Kv#?%B)!Ss_&)r8)c@sW(2LF3`!Y=mT^#5*C8X)RXZ_L@V&Q*jFEs zalHOcrf6NWLolg>3Ao`YSaR&+-r4?7zWS(fI~DfySz#6IgXa!cs{Vp2W+>Vb)6)HP zf1URa`}77E2g0jP%9v<0SK@A?u4(sf=>5{^6o$ zR!hKOXsy=o!G{e6!_^YFAK`x<*2~-NwneIQR#;Gb?Dl~@Tn=w)uqFo$zF#WA^KxA- zm5~R}5bMWR0*wDL7|-kPV_cX1Bjxd>0kvCgN%S(w<1m6>iP?y>9{gkdFGGQmzA;tdLKP_Ri#q6z2GSGm0lreaby#z6$$h;8dDA4-9Pt{(3Yi_2?#tDXE zM4)0dTUtK+vdmJzdwWiHrwx7Cn3V2VT&ajBbiT+Zmn(RVU{@kZ6^Q#pk6Cgi;4f~5 zz5WA!Ae89kv6)z{v_i!XYZ>SvzNf*Pj%`fC#Y|oQ5Xl8tQW6ZVDFvlhJh+r>tkSs( zc`}tuGWh59uSKvD*S|@#1!r?LPgY`u-@rjH;2XCXVjk_y`u^q#CQ*`VCCS{xg@lz8 zZ2exZB*>T13pn-b_u2YA`|F?JQ<+Hr`WINC_5TEe1r|7Zg5TKNo^gEaa(Yw2jj8Bi zPM`=%S#y!W3MH6!eaOhY%I-OOXnT#yq!YQd6SQ`ofR^TTiuJJ9f97mio$8Q>VEf^@ z7U@?XG5YS+`h99{-|}qJ!@N(3FTLN4Vu01Oe{#%v?6gPryN+V4fA#P%hBrm@*)}7c z9re%I0gU$oLoe%pIb#Rl55MdUz_`$#c=+hO-6qcH(J<|@ckJTeT_4QV-Vqy|p1^;> zalig=1902Hw?DwycVB(D7~V9gBgUIgVmL? zm=h!bGrI5u4ukHX1|CWkO1#e{s_G?u2XtyG;XyOr2-WM66aWIe3N8FZYt!**&9aK0 zP*M|w%U}SJ$g%|g)P6Xp)LRtEF-}hNPQln&gem43Y`n^kg2x2-7>t6A6B)KD72ae!58Wor|(YOn!13(fa$b z3=|Uu&LC8c6jJf+7Wd#!9Vcg)szjg2Ii`^DP$mJ4%vd&-S)(TztYX}wd!PsBk;-^= zn&C`=Of$}U^f@q~`qcy!u0A(AwJ!J(coArO&k-|7sX4u(t=mK!P9+}55T*ibeD^@E z{}&xsXJtJdOpo$BbFYb77=nymRFnvH;E9aU&V_%h|D4GguS>I^s5IRR@{CXj^m%>l z;XmOE!baM)wLH&U{{{91y4j~L7aQD?FUKOYl zRXUSlo+PlhBE+ku`Ap%sMIkPWwnzk0lYxo%BYcIrVE=914;kpl2%QOSk#KcHU zv*iMvHa$&^@EQr$l5PBqwx&Dw6s$lnsaO&5kStloI^Ntc$SQQtjFjJAm>^zhtIG2@ z@TZ_6oXvMG0)l^m)=trq<)HghHd_Dt5RX_&`hApx<<-2gN}!_9D5OIkWGcbGjAeC* z-xDpWJ4ZjfOV-~naPqHzP%+#eY^vfZ{C*rPFX8VLb_@c?RTQf|3|alKT)=)}={#92 zFZtz%Z-ONo7Mo4Xa(KcjOwF>Ylu~)q_@GcKe&cLw*f3kOs7EltezndhOF5IJV`5Q45aXOP3 zsO%I2>d8dN48i-sWCHsEv^aYM^5{=vvN2nf0rx`kPIVdMr(n_adBhB5m7t{eUPx7a zzqLvB7?OW_|4Du+QV>m z-VtAc<(R1=_Fcrso8!K9w|djY&zg3?k+@Wej`M45E#7FMa2#3^AZ4dt9yKs;y?Dao zpvaS0fO;FKJ2>jKB~jBoSicW*sgsdts4Lh?G@$Uw`n@?+ut}mPVm40%?yMq!f>D?} zlaWk$oKfn^d>q7|fi1KCV+60Dv}xa#RB(T(xk!Nmz1&k0=T_kPy6x=z-U^efKVVyvGfG}4wLYE0OG3Mn;>qC|Snz}^r zOQtfi{6SMBQ2Ac)n7xfwjMoXcvwy3z=Zc}D5v%Lp2Vk**A-1x;8bR)h4DfT3MdJGR z`i~A9@(>O*XS4P9$*RK^>+eH${d@Rs7=8FA)UYSeiYt`5Oy0L=ZF_dLT|aLD4%jqZ z5h`I7qCbbT#g49jcgjF8wpfj$58r|XSUKaKlzf|Fo$(RO^~#`5%sAG6z)f#k9B6_F zc6!wJB$BtyzCkB7z(ufN$9c9YjL5el$`YQ>&{S#|<0V{y!p$<{Ma~AaSRhcL@s`ZN zO^YM2PTSPNht3MUhkMwX?GO~rsW+>_+r6nHKc8-m{5YPxFy&z&3YN;{+K52FF8dhi?@qg7QP&jv#(V!MU!+Gnu^}b$ANhqm$!G7YiLBAtSAeWuc{oDWKWdU zY^KVl21ip1k{V|L0xymYtrx>m7Z0{ku1KIlYxC{P0bHn6A|_fMs5=6`vx$URcvB zaV8VBTBu8X)8Gp$FerV6m?^z}1} zqW_8rg8tUO)Yje8n1zA1ukhy4S#xQ`)`nRO)>4jct<-lBjnlZlOr<1-{?p|1@r#^A zaKg}fn}I6~MrXs6e&lp`g*`gcajh|JnA%kgD7;6KXNZXXlXCq9v#-R>B zPqn6yA|Zsh(IJ9e5O#e1!;o}l8IiuCbZeH*semCy;Ca7Vw=^|B=XvEl8i`M98NeN0 zrAHjoCwN7{Gx!1wDsXc)K+83O6Xxvc^HF>5%+p55@Thx%@$LTc!NuVGtdH0`n1$0s z&Uj8%GmIKj+o76qNsdkGT5Mnh#_RWq{3X(Eh^(^GB18=B;UwVOJpY<;lym*Q0rt9W z0=$~n&IBB;|K-(L@A#m{&Q4Hr1R0i7p-apr2_2m?g%`t6z@k{Ij5kjJ4Ta?GrR zYmAS85sgI}OcKl@!3@3?$u!N>Xmsz5;I)*(wk!_&2MvTV485!E1-xMq^neAP9SaRu zSoAAy+}%W`DnVd}5dckhIctcyZz@HJ24owm&ntSt1=!2jiKeEeR!f%*rfiTUYIY@( zAuMeyvK4DA)58!%ZDrlIrHOfzhaGV8V7-({w5|Vrg!nehu4!ibzom~F3FFlo$K2Q_sCADuT~7I>x`@X%olbks zWTT>b2fD_-nRQC~wI=47>en*+;~)25pZ|n4cDMBmI-&axwa@w1iSv48?{M1c^1l&i z3a1k}*TD!A-s09nz@l-kz>nYb`sZzx2|ue{*kwYxx}$C3nmOryHR?*X)-G84TG@Z~ zws&@Vas2w6ef3d&osb`@7@BD2Chst6uZd-6&;3FqNLnB$kB%0P$%^BxknlX1q_nBv zV?%+SwfvrT6C;d;EibVevR2xOQidy# zeWjC7Q%V!5g}|Vhy8a=Bg}na#hkr78IjqBPZo*Nd3EH-a=5aAUzr$jhie4x&7jSDesp=;ntC*M7+o6zXeyXT!Cpza%=$VlLA(CAZCl*Mb~pE)F&= zV#A)+Fu#aRz~;_yNIO}lJ3{L8vf))qCF+Qx!@Az}cStl!_cP{~z9^|3>r0RqOE86r z*B6Lg#NNEXAG4BPJuGLoPB~m=Gs1%#SY|WUddot3jiow+d#%&RaGTw8NM7fwQHr*H zPg>fIyLK?*kl^bAtIkrLUNUuO6m;m|S9o44J3~VW@i$b)6sRg(Y@=+ppla?>jpi|q zTdNb%d=H6`BRT{}0$EQ-4t%u4nU<*1#(F-mUc__O*15?p5M>Oj@q;#p5NuBX^%xI) zMrPu+Qucde^xKr9J6B5$XtObFayHMS6iIH}`mINc;`P?K{r>F+P8FJo2GuJ>O->qX z_3MS9%Ob3mQdp?znBOTb)Ly0CTd)fkJX~=EtYD4VG8R*-P{axZ>uMHBcGgrSRPI7B zmjXqaqmf)y*Hr~DER0Nr-ExE3d|x1`$E)3C5?lZFuuprStQ)s0CsW(60d1A;@wg==}}v{Yk6~x(q@ZU1rykEoGnVMk}NAtiqGk#gEub4B~!!Yg5w(B&nIZh z3);MDg95D%PSE-<3B}~C%e8QU%G_hhyr_x%tqSFB5*yL3>Ndl(y5`~0akV_&R)NMQc%HwPZA zrJ6H%fG%#e(1`t6zb9WuVO(&ojZeJm4Pr2&8S#HPOa2 z?ei~K{~R`UWN3S-QmbO#Pq>U0981ZPD;eyJVY;S6H>)7)PFQvrQWokWxRrK)Fh97| z*ejzf@1jLLDwNvkdc%p@9V`|EB&QX2uDX%Z4Jkq!#E#IkCwxB7mxyjpb2Qw`Vhh3k z`lt18%_lciTWpAD-uIGhi(lYwn#V4#Njgo2w#0;|arKS4`wIaAP8zyp%i?=fDlR|j zmZTsIgGM3k^^zUKsU|8v{*L2sFVXjN5ylG1q^!Dum|HO~2DTgq!7#RvC{rY~#2RK# zDL=0Vvqpk|8Xi2gyd?1WWiBk%$voCCWcD(763c6uNUeE*A>r5MA;l=D*cAb+nnXa{ z3}|g+bnZzZ&>lC!JFf65)|mJOL#N@xHysuu6cGhkUdA83F|NXgZ_UQU4f)}l#|q_j zD>_2GCQR~p{R1-Tk42e3PgOD|e;zC-kQcmJM3`fWq-SUnHYDHfDV5h0(5mQcrix&tglLJX@;!j8x9zu zM?w|)U>_QW7;kcTtuLu@R~*zKV{?`3@NEv)4@O{f&0W#37#>7j;W3%r2=NW9iZ`di zSjQ_l*cv=4>X08D;qzMI=5DT%joNGxP+;W_S^oh~EKD_p#4SD|Uz^QqjQkEJE4cEo zOI9mhF=hx;Kc_T9hA@N~$|cN@5g4|Ka}Yb$Ii$jIq!J0x)Y6TZ5dCIVKGvtqTj`$1 zz47!8K_rfmy$;x#SH|mR;rcysH_Ru6p@}izz4iZ-HNnuEP-PxBfvxfpAoJyU?;HcL zZQbNYDI@s1SUaeRBsDSIoZ?yO3L zrLMEr&@@Bfe%@~q(mxc=<`D;2-=2n}p_-j)HXek7YLBbwxI!!GIK@I_*-$J*GE7J7 z?Nya zg(ZyPFsCqkDk+MCv+C&Ut~khmP63o+#bpD}+B95mMzeZ7^6oxvw{W2082UkFwrd;0 z#VAZEncXY+>-Bq41U_eYv9I4h+BEcddw%Y2=2|L&em>b0M(mZM`skwRkUyx%+iUjb4p!@%Clz)IA#aKxZ`duFU~dKH4ao=O zM#vKuWQ-zgM19pz8;=1=c{DvgzuokFQ)D1Xp_qvfnz^_%M)!0HXtixsuDe3uY0Ok! zA^~;1M@CCcl*Ue~%;zk5E$G|?C9|`64bSXm*p|n!xvjc3=Dgk^J)=aD6mW>*Wkvc% znRp2|#1I@wxc=_qHh2uUJiU|e$YF~ium$&8!R=9uA6>v9bubopb9%hq$1ZEl4(l3g zHpl%;60ZgFw?QoLc`m#b|LlHoS~8kEu9$t`oazu8n4#;EUx9O!Bfc@D`U(+0fWNL? zR>)t$xCbNf;7I#Z{~+yg_$XP?hQQ5a_N{4f#wKy+GpSOm^4; zbzBEm3BG*&Lky}c)Hu5j-ym1`RB0*z_+ouJ?t#Dd9ISH|+o7&Tdj{oHDqRJu58sAl z%h;)0TX$o+m$G9jn1Hs9_0A}?H#dJ|i}m+Ou-ZsQzFSVEHmA{wDs?IZe+;RHsdJ@}k|>yM3yENltkBmf1d2bY&tZ$tC-?a-%{=o2iWBgdr(daQM%&U{|ywx~;MHp6mr!MymgkkOVvC>>NDIt4Zvi29E z;#V(%N3h7SxlO;+vr->**jtW{9bH)^U;8hlh(_=*Qc~DD)iB^ZV!gd;B4@pKKsn&h z#WiN(hShJ2%w%&6;$Ve5JY<$Fiqcp#Ri;#Pl6P!UxQ^Agq#|LhtQv{IT(xMnt-n(N zi${&UHSgJUC$*_@{rhNB2J{X&Ca?CTHSW8CXkX*R7ROm1?v8a*(F|H45e;qEGd0;v zuXCs6X_vPFT}H<%FV*XVGJhVu#-H9P7*+(FQ$kc=WdYUFkA)u%Yf1V$1gD55)AMe+t`?R z@k0Lr3M1KGtnV}C+}0`G97X8r>&>9KbVVSX%UZRrVYa7_78pqi3Rtzfgg(+Vi&z{8 z<`ez%K?isjT)ZwXdc}l=9DfPd>;9r84SWi`sN7!~HYAQEIS{X_5}HLHA8OM~4n zBQuJ$!xj^?_GKI-f~O`Ii2cXqNKhmTo&pmtq7**~?s-7m$B08PEm{K=U*U?YxE4db z+3oaJZuGRCe@0T`!-$XZnrIB7{$^WBFE*t2^dLx%y(VZs*` z@!eM+QM3;h2!m<#2;GMpb}gGn94Rbk3_ei)dz1)KZgt&7p!iNAf@MJAH0&%^fRzz9 za;0QgB!bNV6C~IbJb@*KBZvBBcLiim-D}v&upfyKjxK^_`!URye(@RmxPR1t*1$Zq zxZ~_g;bido^pVI0xQ*r)mK!h?k}l#bF=Aj%*BEM+Ru^ zdNYjgz|uJJgyLIm)j-$$$bR?W6zt5gKIC1Q1j&ZJT&-<~PwS+$2V#aSZV6t3h%5ms z3oZbb21wwFdw7J(jKD+$V#rCeO_pJzu=WCUnrW<2RcUGe6r4b@AiPykY+;U(#Lgtz zd)2RNCEL-E19NutDU(@BXNbe$a~p-2uVyNNDI&@MJg>wfP?Z^J-3}^Yl&IO(0sS~z zz4wp%)RcU`ryH!F_YeF3aM9B+MJw7x+n3Cl^?TgEnqJb|R`+Jw)Wja`$@N7nvY&g*dfA0}z2d!T*2qR2(T$u@?)wc0bq+=x*m&SQ&MMbQCTf7dwZM}z%t z@pgOBjOX>dcK`gq&^qt#CaM{}y*SO2;-X9~c&0AOLKPCLc+Z?w+z0rl)snJyy*KUE zhd!n)@tZN^ZNa>bGa)k`!)T}h2};Aws0+IU3{Re!m|_t>){=@qrJEP|9-%~!Q}C=o zZ{x_h9hYl-gyjxZ^|QJi3{$4b05;jd_M2_ZT`dr6BbtjO_KxOf6H@g~Xw%05R6UNr z;fC+TyZur9X|J>A5^7Y59}BkbK-KSx6{>fOY3%5maz8!=eAc3X8^CP$0K0j;k{qL8 z6O!#-P&c0=v(F|)i(&HjUv<%Be%Twq}F~iF62-iagQ}l(?6o^goVZ>dsaANTj04%maM+Y=XF*5LSc~B zDzF%eY$1>kh)`^WeXsx0)KabAmSu!j?1BxLj|ZFTlfrb0hRE3?Jh~YcDYyiZFzK zXvd`tbQVE{3QMr%(K`#PJ#To{Mb5}*d(aZGu2GOFy%=+o4%Y&iv{_Z8A)Stiga8_G%TD%%f5%m)#L138|sPXpbsaqx3nI`z@CqLT|z@w9K5yKOnq_7G|Kf|N) zPO03tX08s*!4bTfA=t*C7_^!bV2bYza~a{scX%t+wFZ{GB!U(KRUGY=N(Xc_1S6db z12!c|?YBF>7%LaKWiEzQZROK@LuRj;>lE$deD(3KK6>9DUEodC_eS9u%`bzW7< zx~uS}+aKF!cynW8TQ`b!OY2*Yf^G~NSYZ38l<-N@f4!ZI%b${|shHzD$utNs&? z1-0su%>-6lqbovmj6*SXIwKfNhaTs|=pB-6ha3)U<-9CfX56G1{FI_Icf7JjV-DpkfC>2zU+`>P(>xpf9I7V5a10V%X|ohq2tx zHY`yw$2{b7`UA!vIG0u9VBYy_ZFxbrTLn}()PWIUP{G)+CY~Y+BJNt8sY%c6Rn6Qw z=0lTePtAnLQXo(_#?;6o;LIaoRU1!TgXmHHw>teNUV}I zczGJDMI^$eJ|xrWwe{`}q3GutiM~%|+;@dvKOVI0H|FOAmp8IW*39qQ7$-g7_Rs6L zy}sYgzSw@dTY#i0IAw(}(m5v&oMMs55DyLr5tjiGk;?Jzf&>lR#(4sgQIvCxrVTxY znqBCK41$E5+*lE3&L%vgh-oYk-N=(A72{zX64e15D#CkNY?X5`li<5B9SUz6%qZCL zT5FwA4{9~jp(088BAn+0Y{aQP)9_n72viPN{Y;wTmX1^Dq9?bYEf2PsIW5{R&EmK_ zwarN8AO)%_4Kbl`qP6+*<^}k;mK>fVj*W;Y+Ymvt0=M|?4sZ1Bv^TdA*toH!2NBk}l9Jfva>^o=KCHtJU@a z7CqO!_?g;cA#1&PK%+8c!^L)kKHbeq@0FDjlJP4ZuXwWo=WRI_Z!5m>s=+mR zBf!Wg`3pAc`7QdjJx%t|azU@xPh&(lTc}^B_`?Z%Arc{*NQN7R=V`+13#nA9djSt& zciOhv9WNxWN2{L7+ksMf1OJU5RT1W~hQA(MMtadpBH)erDv4sWUe2a0AG}21R%yj}mb1VX+0ZBPHmd-Y9ZIo8KEQ zVXs!aop{>UiKlnniUVoC>#9Rxc!H#%LV|PKJM@xxYD=oO{lTDjtodgLy?=bCMMTQk z^$+ogD5KGIh(*pYnUjeLnHi4p;4QoMzn_!i>5OdhA~^eH4sL{EC*Vb3U`yP`k!T^_ zW(LN=lTHFv#@p^SEU&RqFDuw)^OL)EDXF1t?%CMHb3IoLt9C${R_Cthl;kXSR*F)I z;+MIcYtD)?gi^p!@(~XVJQH(2GZ`dmPnWoZ>!54F6&NR}VIIvmQMV$Wj*KPJM43cu zhZ$a|LNJ4oAB{vpB@p^U69EH2RzIQg)E?%JqL@1VYF~O2Im(`$aT&3nH(D;XIEY$G zwNXV^5x`P<8eF3}tVOzkN2`{ZdBl_21M^$5zR<;<1+KTlu+<|bdQD#o*raG>V%>=; zd)iiQd{jPS^M~6K$5AFPBQ4MoEfg+Qqin&osquI4H*5gVG?7I%Rg|3m z9o3xg%_qxLBH?by<;lY^q1HKLSam)t_E9VXt7eWYQmSAnHp?iFiOvTfMf(P{lEf;k zD^LREr|i8%14d$O{I#-DRAUohD`(5OsL?c=_i-p9EMX@*%+%ltI{Wk}k21Lz$~2HL z&=^53RRNM-5dvwHvug--VEDsjQA=3X+)Px2#j}A~WrM6O6#Fwv>isvp?(yLDm(O-M?G7mD zaR1da_Hm_{)$me-BGO$^TcZPCqn*ErLS_?%jzMLWG>LdTtkX^;0M#vIk@D z?W6XoOi^TCuz|TtK#{}N*8l3e@ZCwN6J5X}4FOGtEcaT^h1oCMu)9 zzF@D#vI@&N+Xe0{x`N;?yTL=EdIeQIu>*(iO5ai7aVZQO#92Q3F}g130#93c5@tNe zvw|BHgW^shr=`uE5#H;Y?X9tM0q$eK16`f^8MkBf6)V*jtiOLw*Ig*bk~o+213DET z&Vlp(2#jN&*9!s8id$S2h~g)(F%@+KA+1f!A02#~DxyJ25~0K>%E=z5-AQ(lVY4g0 zV>nNY*~;0KMEVM(vsZbRVx!>lC$XD|uC&?ROgyhOdFle#O9o=oAW1nYwui4Q+5gL29qu)e$ zqZ>!S*W5)|$daIFr$gLG0aEUWP6gW2Mp>He*_K(Vuq423R5k&#<)}j*X;d;EHXQp$8x04>c&kXhj~Wby*BJ)P#!ty z2GsEGC~L(*$C=+1U;^ zd7XYCgZpuuO^i9^um;&$qFfmZ?5zc$OzT;9x2xe~K&VNIaKfLu!p}4oyMYF8&g1_q z3N@5atG~F}U$#X*;B)tqDARPkYEb3q%e5e?G@If`C-Ja0FV^Ar#&JJ;@MGE|xpjGO zN{UTb%qt+qiW>PRz`?aK8oMRsCHZ}?KX8!C@OUfs3sP_=+q5}l=1(nN1Am-$)@34Zp6x8j!P894c;h|30>qg1J&Jy(HsGe@J7l@0nWTUffgWbAJ zHE(YlpGG$zJO(DfWIAOduxd8kj$1#~9{Fk(v`VamoMLvG#-^8Lx;jtUdtN z7TaO8GMj(ew9r5lhiuk0>DbaR6^$Gdc!h?t(- zrLNQPmK~10PRc}?3Sq%Yf{EH`dh+u|yCo;fKsMZ11C@v&0f@ zSAm)kveIQdw&{8`+jusS3GS*q-faTQP1+E@okKlQvJJep5ja{lIP#4yCH^tr! zYb50focTzZq9!w{_4g%Dy?)&cu&9W6yj0$B=vperZSBR`LI%?)*NmO)7}PEPd&=j0 zMSYV~@FC<_Bxp`(2!@=BxWkU%YPdqQ0~Tx2cVCJGKHuzJXj**iiK+<*JqC-)5IMa=OwC^?sme(P6G3241uukHYzt#Aiv_VH zNWfQgu}E3})#ikFE&HlX$eCI#*56+;Shx<$| zbb+#hY0(q4ga_ow>ccl7s44uUnn|9hMXMXdTQRC`AN^_VuEB_QMI~r3E(rW3P-S`T zY$C`v&4)xhHpz&c-94XeNyr+)wR)!oLGHJK6KfjMy#n%6b>k}eND1*L>4t@`ZRw(I zMbJxLQ`7e0M**LG)?h8l6H4&IplS@p*vg>Z=36J*HNe(7NcAF-=DXqXE{t5m}$zK?kztLt{ ze?8$5Y3+RJ6<( z^IzuNpjUSq*L6&4YA(GEIc=r83;N|@jqq((CeWv~^ISp!(eB>>jJ6wM6l9|H;Xd>s z?aBCjyNukn>^r6TUgKhn)M5|Jo0Rm=w(`#UgYWsQ_Nr0bQvLh3-`?RN`n(?KuY4{T zSRzJnly(^1)SJhNaGH*E4pznZe_Ojp-SIo^N$_m*_OD~|G|@{@NU?o)-}=4wB>B8% zd?chVJLtZnM=C0sg31ixW#Uh_{#zDB$PEb}|Gf4jucbL`;Ae&3u6pt|SMLtrXm9jx z&pD^GNMGs(`&13=ra1aT9?%|F?$eF;z8T#fh0}&E+pA8p8B~#F41@^=V;a)l@}wJ{ z)1J^S*8^M{mG#2h`-4Y!Ou|0+DeXz=VlUmE@U~%EFeBW!Yaj6awvZJ+trZGMJ`~wb zp(a0dRrzVU3?`DpLI(vrW4EJu>TKv=Xza~yNAUNKjzn}=Llv|-0H^AtJH}Y-9y|p@ zyBHz=ahAmyQ;c?0kf@>5Gz1ba{=ZGFbMCC*V_p zTG{P5oKk~6GF^&2j8*)ITgOq}Czu4;>F+y!^)Z%DgrbP%VJ7Xzm=nj9ITPtzCe+*L z#~ID1uEGP`8t%tbw@uplfIh3UtrQsPI@4CLEQ2;hS^2IjIiJ>h(OYF96lcI^S;S36 zG>_7l8e{Dcz?eejF*cyX-4F{A?y~Z?<{uDTy6TYO}LwK!eg;1T@oT zQg;}YaFk11&9H5HC(GvEkfXipm)bkJ+sgGY)x2l*w%f?1*z6fsxEfWQlFC>}ss&$a z96vTO6BNxM$`8%1gr#&&O|^_muwl{Ntm<~AxfqZ>1ok)K!g-QNSTxP{AX80Q@wE=L z4AHqXnRSDF&F2}MmYtr}Pu+`c!FXiKhqGKM3T z>Nj2?U_6R0?ltqZ5L&elO{@$orZ|kjDqFn3SiKv=VMFZrr?E@>$YI2!7ZWZs|^BWtMzcN)o%~q-; z&vvAl{^IU*&|v*%I%Nn38S!BPPtvJH1qyZCkP)T_%^oI?+r2Vtz8CBwP zU>VreVu`@ooTYPh{c91dVlh@VkbP6D-1}U422s_n88OY4i}JN1>7F|`UeFs zq_8An#i`j1sMsk2k(;=(VD;f!f*(t^6;0-Uaoe1dZi;qJLYN+$gUR}jDFJ}+1#=9@ zRHo~Os0R@plqcLAfhH6UVOSzhOg1Z}azN9ng`um**X~U7gf`k=rdV=0tU7{8#$P3c35JWz@{@rN@-c47*N{rXvQx7mW z8!N)#tKh>o3`x%~c|@8*B4t62W@lcXg(FUH3QkVf;HYQ$@a>#~n&|x)w(l6v0d`nH zl6(SAf(r#Z8kZLlSvr}QBbh>m&@O!I6vis54&xGwhP^| z&bH2ThY?x7L_k{^T!bx;Zj#}|F4pmZYv(!mR0Hd!bt4h<_!Veg)W8)ExYFmkOV<-v zji`WAP^+!=dkpwV0|#Zp_5VZ@leGE0zRQ;dXmrHUaKgP{7@Oek!LCOdL>O-@S2^Ek z@akBk1?94L-Y}ODZnZWqgW9Y=V3dtF%VFy(4vx??7hq``vbQ%yZEnnSyNefrWRWc( z<6v5Z_*|)MlUWac{T_MfXA~H)%y96C*pOc;GaowQq4;blX_{J`AD?EnOLo(0U*`c< zTQ*vh50K8NMW5Xfa97PKQQT8_3B;$`kEu#$*u1n^DBG0YtD)^8Dt6LwwwD^2ZWGjV zqo;PVA7ps5>1D;@^$!VLM<|;PK95C;C7Un_ zF+2e$t}gPa!(H)9Y-SR$NK*8@tVG?_6ugl`*r5nTG-S;y%PUP}3hPFBT|8|8k~IZ- zz>~&#dw^Kp?vaLA7Yh}|a9_)!%H|UM)SN9;7^C`M@l*}h-!DqZR`{|9i9A+(*JIh5 zl8*}e3i=a4v~)O@vVNIHNxJ_^f)BOtBEW+|;6oy&5a+kLAN3;3YLSZB}21Wc@=hMU4$=zeo(dKDt1EeFzha<@Tbw*f2P? zg5OSwilF~kpWb8suNj;|?gZL)e6E%XT|+Q5PQN_rRj~@~BoW9@r%|mih0>o|{~5;6 zFgorUv+(KqZ1rf|}miBdR?#}ploM7HWE2dB~X-0?thPe_+faw%w^+FeBh`|waL2v^@nc)q&3Ph93m+5ic|%ck(j5e zr5Wxr%aI0S$V0ve=+%#5XrF*vO5*)QlO7Rq2Z2mh8p7jLq8C2l<X-gcQv8o-%5$m#9U z*(j&Vuau;mVGRG&F=iBZ{oZtg!8~`aL8hkHpVY&x=}6*-(OM52KvlGmw9Q4kr^a|T!hiINt zY~V~`&~pe!4s6djjIi`8H1@ciMv0l@ite#9p3jhiw4vZPgzCLn%ngEp$`wd?oCvup z!WbcFoG}Umeg>hXUx03+kTrS{Gnuis60fSpvJLjT>~+B}xhpX&^%)sMN2^FAHswfs zsNfAOxd?)Z)O#2#CW&<+$^@N z6tfs^Juqvq%nv`aK#-My|KV2`_^&31%?&bnz2<7*>HvF-qkR+E1}Pz%Kx>G^f_nss zlIi-r(eo;fIqW#Y#yQbG6?B>4(_9m?IoehcIb z!ye60?oASTReQBIdcFNY@6FCk&d=&hX$~7$g5lS+RtG3b4tf{AqJ~)5r8iE!Q}C9u z&E5kZuscLcaaSNRQq8z_qD|LUr4SkU;oC44W^71RJj}yo41Tp@aO~mKM^iatD%5z( zCF+qJ%wKl>`$UCmA=7CiVCyeP*B+qqj5IrP=#>oD2wp*ZYaA=&^#?ENs88j1gq_4z ztjbMPaqI`iuG?g>wvGcGfEqCon2Et-Y5p9My^;pDT+&sstl50csUMhk z+Z7dl=Vn{yAxw^@cLRYx5;)!_SFR+@*Vc*`n2Ao>xCwk#JcM=EsC!jtq{vvdqcnp zpDX*@7!_`)A9gLwr_EqZMJzj@aR&N0;c)k>QWBao&?uN9+srwls!N$@)+( zS*;;#*->@(j4d^D^_*C7s~UN`Ud3uCn%I;%nT#cxb0@$5L-SIE}tG8*Ey_grezOcrJ;EB55$Y5i>M|qk`ePiX#x1a)M>sSpq^q)SCRRIBwDH#O4J|a-wV)j| z9)U_oVDS3zQ2t|Mfb&R|oqF3r4%d0cQHO9!SgmJA^_p+Ybp zmQZV?3<_jGz)M~Af_<-~NU&apV1doE4cnfqSai&c*GiEz_>+$lS*qFkMYj{_Uy23R zSry<{Dn?6eZm^jB0#gOaxJY(m|IBQ!KtiVklqEhfdL0HGE@YB9EKz4_V#vc>1m6^_zfH8F?5fW#BwUzQBR+UZid9?z#<2q+y+T5%rWN32COiVdKdYNS zMR#TjrrVhYJ#eclq%%O<)0pAr(mdAs%I#?XA|g#dQ8JE+gDZ^}62@(nNE#`=&P)Tw zjzzH9G2uV_Z?Fu?a##2ZpRy~yz~`R~-9t6Qo`B=A+ycFBWE&UYrQ8J0Y-yQm<33-g zL<6BQUl^l<0V@frJX|d+5Hz1vFlhLvfk7Jy8d12+&okS>WYjV&=eOPZho!XeI-Lvt%G*?`a9CcOs5U7@$>!dbdSn( zlgp8TWt>PDL`s(GMu$U`9tVED|2=5R*kG`HqfRxPgkDIr7iyJyGo98clO0 z@hm0mg#Q`I{8=D~TbxULPZU;tcI3}I+q>T%LA8_q#% zMq*=dc4t)dnw{B0N0X?%*6(i%k9jSHu8(nJC+r`iiCUxoUSGTB6pApl8&S!tBZ?Yd zIQKSQE>JLRM4wdk9%ukvNebI+j3Vu;y-`j&qs8D$O|2jDE12OxP$-N{wyYX3f}2AQ zejygea-!_p06r_wRk}vM#PsOA6?~nyB+ZsEA2*^Dl{C((%oHdQ3WZg{KG<7SKxaq# znhZ%d#Q8jy)cmwzwP!FOjpMW8#}=9ZgU3a%wN`W9DXe?yW1N<)?{r#0ZP!MVBI{7PdRE){ zs_kH8JR-}|$S{tHC7TGRAcq01kMc1(FIjfU2oR_r7{~~^8WeOVD*{}poVwnt0%W7X zpW3}EjS`bupx?ti#W+vY2C{7! zg%g2Cedg8&3y@2K{*-25GHCb$4-&;|ywGiUs%Aq3nj=`z{4^aiGk&C|1T~XBS?aMi z8B(|BS#Fai5`q5B4S1obH31)m+7nlrhkxFUJuX40CyGNCa{2J zi7nEoc7Z*%Xc05>PmE{loF;OJ7L)#EZI4VSjvPPF zXCg8|i>-VUj~u&4st0H6y$HPqjO>~1hd6|a>Y*$eCXMq*89aMJwGLI~&w~)9M--?$ zCUC-&q8T3!>j|4kgm;5m6d_+9*WhoCu_l`wvb z2z58H8F;KbvD~o`(LAYk#CH=Cs^uE2jKUo<4ef=d2AM#f&eAgi-+zizPKDZUWiq9A z%0lSZIw-Z0qU2;cUw@Ysl&1HJN_j1Uc7%GjkRQICrYlP7J9U4&l+bic&?Ur+biV%n z`qv~|fnhM+8LoeGFQL3v4A+8)K|=R3*R|b-Xbk1aOx~mnRLQ6aAFgCU0!82&_=>#3 zyvn_V@><&c+|>G zEsqco9j|!UERMFiP_}CxYzX|G!rdgaiT}h2-Qog;U_@d%0E4)hpRlh-tFP9G*J_?w zq}Ixw6hnms&b!aAZM-lr@k=-wOsZ)Xio4w(9#!q5?lF%C@Gi%%Z&4Az;zpd!1-L2e z|Ch6+QI_D%VSRntNHAN}fHKCi=k_e78X&q`Z!T(z`3PoY{DduRzW4iTTjg4x)*}I+ z=`_N#f_WqMbEK$~f`yUNP-<^dd5mw=X;ye77rc~bxj+&sJtX+o_5VuN--lp47#3B3 z5_wGW)+9R6+*5k3OfWFbho!?n@fw4auAt0j%>FiFTN>gv9wYa-zdpLSuRid~O>C>S z%}q=2D(GAEKCv)#5BD9n*IsAVxNqx4(le!kaP&^S zsT_|1{cU$28S&-#DDMJ8B=*wK3=V^iv-S5^UbU%LBgLU)ITkr~>6pTzWij%f>0Bzl zdVWKOw8fcshi9lG3nDXS_i?HZ3Ol-2m8kW;dM%n1?vOzpYKk)Kka|NO<@*fRtG)19 z(_XNs$XK2CpyafCi~t_JzQzu|WfTt{vsZ;#+t!-!*CtE@U2;>h_Fe<`qb_o+HJ|O( z8Z3k|Atchyi{XFfhNG?P?li%6*|V8o&#bPS89^HZyZDz-Ihg!Y(T<4}p4PX(i?Ykc z1;Nd-^$*#qfU96>wl2zHM|}NnF`vw~1Z4-<(E_Dxx6wvp_=AF;EHwkb;=rOPFhk7d zST+kJ?l5GpF-lj{S#5eX39}}klX|+b5NvZ%@67}D8Y!;LaZb&1tp>Q%BUGO}JW-}ws#i0)G5a5`+_KdAi&L>;<(p;-kABl%a}cr ztpB}J=Q0rlB4OuhE`!|?`P43j0BagFBvl|NF(DXgQTl9(EAU;yB9x3E^oj)cAVKYn zCNaaR{1i*jLgEjYs|EIn#y;~JKbfh5H)}D$t{?%A3ug!3gFe*EHhAXK6wR5=c=U zzJXz9P$MaF2)xFL+#4ws8cvXc)T)4mO$7KllO-k$+fp5kX&LB|;FJ_nCJ4|ehpil} z8>W^xq2h3mIQAH0!UT(Jb}?;bF1)7PH#M`O@wugAP1iq&@^n|_U@ktoC1<}H4A&Re9tv@k=WCM$Jk+og27h{c7RX5#0)4;o? zFI>gxU=Wyed7xH;0qqI&3}(yL*045B?;MH)Fo>oqp5_TE?n|`isLAINM^-iv&UK@l zMb*uspheJyewXP~ic9>;Sfz6n@?_eGo z1nYg0^?MUue1dPmqriMa39!XGZI<^UZffdB&4%ZKEZa|!s?2ex4$63GDNq8+jj)7> zn;@BuTMlsJ^^JjTej)Pp{17CfrrT6)x0ByV6ix)Uyf=(6g#-+14YKtO$;jIcvQ zmb7`q9Ra!;$%z0HWNVSpRr#ky-ST(|yz26&Hz+Ah)sZcbq9qd7zt?{}UI4FprMUze z3yLNREkW)s!A7L*eI@W3EOrW}&lHBi;W!7Hi)_ZH1qOPa55c`-A|7wSSKRZv?bzTo zLSlr$iWJx&YftVfboG=hQvaqbR=~_6Dp#fjE4d~G%3uK z^?yfr2@$s-qR-!Acn$SCqMm9t5LJ%KFbKdEgK;Jp7Y!eAM@L;jLW(^#%-k{C`$%@i5L*IP*nptSBomd&jQKn{8PEZW7w15(@>|oo@*Z+$)s7J-; z)3e+dJHLZtd7JL@>RC3mv-T{DLEL|MUhR=)04D>-NVw4o;m}D5Cv-6@iIIpPVRh+2exsu6UFqfAr2q^AyJ8?hnJ+I~GLy1vZS~ zI5(xcDK+j6StPG^+wh=~U9>~m8k%8+vtjA2m^Az~S%!%kQ-z(3qopQ9TOgX-1jJRe zELyH0+4Mq_t{EU+Eig@+tSm1K_*^1QHHE7?@EuL@i~~cUA9aw@`>+V9mxHm!SOnlH zJf6@^l&3{mbs2dt9@L1c6Q6gT+_wnz<6*FWO7nh4B@wvprm(ZctVbwa^Qxd}pxD;E zW0f9Fs@gCy?1VqMevkDK3_%wO0*l}R%v8R?E4gK)UP*_WA>YDn(xceqHQh2kkJio8 zI;jYv6t2KcYLL5Q1=#`%9x&ackPJNNBkDK** zy~LJUJRfqfY|j+jD7)?K>=yg>2E8|L2K%qNhv#ql`>%TJRqw2OF*rOqJLsKV9KY-x zvpqJ@Tp^%8c|H`0q1zjgZyjePDDkRfNv#V*A80RL=T@>D_78gOw0FkdblKo!zu!Aw z{|bcAgXG3!?0|Ivh6tFBR{xoXO4~GUul}wbAnKi;3>XZ(81_N;pcpm=eRW|P@250K4j|RQ7uRgl#@H`TrI>Wm_b|bZr#8>*aU;U3M zQphE&clzRQzxr3hN(wJm!-AI^jHlZM?M`U+y}% z);1)J4Gg-HK_SGo4%YvU^U71=!Rz(^!&1P+JmA11n6${F%ov!Re|OlA%?6b=p9vwgr~r&1Xo0V5iV!VZFoITT4I z#|2pG7qCGScx<*I$oqh)@)!etUgLCAu&(R>m=boFiUCX3#tuq>fK)+8M88PVAp|#1 z5zmp+EZVeg+DHt^swxW%H-XANsm}|hdwGalyzZ^<2Hsz2D-2`w2*ZuYH zKm1d{pNC&sE#!x9R(R#F*fd>b@Oq|M_GYGrvI+8k&@6dPT-Au`3|Xk&J9`B*-_%x` z{Ugf#lUW>fcxQnYz%oI~x(OoN&7}eUQx{P}h!qgHCer00li@Lu`@Bv?Xj6|?r)m8? z9VdNB7bi*wewTLh5_dwlF2;hHz&k0As`eqSAzq6re^xj6H&m5>_Jw`LXzW8; z17;yhHrM;2WG79yjIgB?l11+!|EvmzuuGaifC*(lM+`}^HS2Bg)< zw_9Wa*HgmB8^Ye@jmK8GMqtJCBRsM~8*`WCxGsl9Tw7KotC9OO<6#z0YNq-;$2%PH zFJ|iche$4T_tH5wjS7+Ulv^=c>>&lZ`}#LVb)zR{m9gWWrX63*!4E^-UUlk2Uk~)S zfdA=VSRtqmf1-?@G#k8nnKsYi6ldD7l_m1S=$wG*$yg)<=xWzUO#A?jP{7Pc__0ES z6^S4l=D5<~zb2B`$Aowe2R0_8h-UMO7u&8n(?!Io$`PTO5c!ogHK`*^{&mCiHG!yB zX2x@1>iO0%xXZX*ZWwILMMMCc!c`ovLvBa_s&G{yRG^hm<3{r`6<)DXwTysn09$LX z7VzYjo_U_*R&JTDro+KbRFNh{M^0y$;hPGhkPyedgog-cM4T5rrcDCf`sh*Dx?RHc z7_jye6l)6aKWgxvV_&tYS65R1DrmyO5R*3*1>S4yo&yQ)*pa2oYWsw>48dgm-6%`7 zK#!YtuQz^e=m_?N-(3HZ7&@O|Hvpmi2V&bo)hn{Cfcblg;W-3YTY=jK@`xkr(=j z1$CS?bnLCE%4c^9jOsxtjLI0OyU;~`aG0A$*j+^MsC1s)CwBQRA9L-goPqy8b$Tp3 zL=KM{$jw7;OL@Sc)_;U8pEH}OpL#u~zs#F~9?^b zOp@bz>G(tmZ*xHxPa5e~_jg;=Xiw`RABbVLN#KxP-y&|LxGrXaF=CS^Aqwq$FR`89 zZt>!A&s~(0b*t&ov-SH9{1Y3pq6L7pJi0MRht16g?RJm!=X+{vuzp_`r|3u9q3XYE zX>Kju1fjJ9*{uEp|A{BYfja&2s7F3VOa&qsF1@+e(ZIM%&N>zk5N&ezH%?@~JWZwc zSh5wLiv{QqinlPQ$;^~4F3gaNW|&4+u)8l1cU(Pu>3k73xU`e(yR=zQJiWkrho3!D*O zRN>Sdj#9G17->ZTiYCFf)YYkGtSr>hVR2J^d^$M%NlVA8){p4mWbpdrxO@FyyX-~x zU6&nqFJ7O#?V^2CrR6}S9AWs54W^(-LbXVzd~Y9Azu2H9@RMS1FpZoKvTjMs*;e4; zxhvVZjSZp1(LCZACpd#c$5P{&s>@?l!PV>I@qyI!dI$Y%btfsOl>B6#$(dZ0K3T+a zVmB$9JT?7E{S{MlZi;#HlzO|j-#F4aHb)w#h&99!#wzoe&A|GA{g%!s$(b5*S`1a_ z6!|6^+It)!&rQ9Y6xh-FJyu}j2Iyd{lLr~Xwe&it=W|z>91-mPl&|^jQRBEwa;}FmfS{5YKJDdSCQaEnK48Ty$<}7DmRf4YymTZD) zMVRhwS9Z?_aii?G_(vj{f~4t?@IgWj2~2a032S41{DWTW@3M5N2igoH`W4-)-BM#V zwwlkGVPAWs-aTpDnO%$+GI)w3Vf;rTryZ=-=bWmr8*r~ml@CsdV`%9I; z+ZndZxUKOQAQ=|m8lwWRJ%x6t*hZnY&L5>y_Toj(|4BFbZ<+qQkKsyi#)w zkFwgUp-ys?jW1l3`R?>oT_+=LvXi#Ui4!N)X9|@woI!-Hmg~x9GGk+bP+T~=J#)lv zlKrvg)bo6}d=&TtOS8U2RY}XpNYMgGVD)mZIdR5^B54=8@CD@pvx4!x4e@aEN6xEn1vt zMDSJ|@I8DSui5b0k~j{v<7MiTkIW`pYA-ymkk{gVmtRD{!CrAf@syxa8^A-!TT#TA z^hDI)K1*E*OB`GJ6$jC?HG1Xdh0As(1uR7LL|I^ymt*&#|&Z(jfh^(=! z9t1om*zyGVV0lg`whS_6*9c84MEHrsRD49)gAP}wx-l$5jm*0$+ z6%FP7%`_RKED)1--0^8aAtw_y!3USuYg>??Mil;F{hmx3s@fnZ-u5W84SGy9>Jgty zi2i&JreytxI}4811btcK)0Zj^TMWQAmc2|o6a*%4UhSj;@f?V>vY}F1AcWSi@Rn)v zz4#ty!k&e*{>fP%0SkZqQ}645s=kn{?_0EwV>%6!B>iFc%$;*C^ecbDxH_3+zPYZK} z*twZvz*cbrE{DbJSv+KC@||b_dnYK4mT$km{^w)%B??Eft^e>c$$?k@g)cycz_e|2&Eaxge2(9Fj#4+lSC-HXAXclN6L z<^_CXz>Y6ozv*?)Ui8j-{ry*iMi%qum@H)FS=%T4AN%;{XZt@P&Wtz@!2QNjy;`ru zHm(|HecwFkHx{^W7Wf@(;2!Ih3*2vSeZ7{+xwOscJmCvWNpsWZoyy!gYzEHQWx5h< z#m67M#h@8gV}Jsj%aW84BWfy`fGyDMb$EA3hSzep2B~Oi$fV3v zyJ=2_viG|vt(#mR<{v3?jD-_#?&aXgT53l{>3#e2!E4;NgYz#@8{-Ck+ui>;yksfg zerx;J+lo4p9drlgUinRbfJB$o#WL7q7sm(o#ZulxZOQ0O0XaIDv`n9&Dn18Qd<=Ky z@x?xBQ+ToWhVA!`&(BWS{>jk=C?n8KXZ`)&*?t#G*v=Ya`cSmqbdlRZ0j`PeJ}n&y zyMK=NhLLvt+b+BWZ(L}3dnhjmjF&BZ^c4sUt50IIA+fh}>E5pI`Uqq=hHK+R@7?Li z_3zGl2ghImy&asc|M|^v@8yY>_Tq@W=pJ<6?!UvA^w8mR$K5x*>%V;X%|0yq2|IZ) zc-K9-nzd1NNdHd!5t2QRYBj=+NbJ|Zt1=-Fi09C}F++w8d6}0z=KDC7v z=wU$jum=7*rEu9acbXeNV(?zc1U_|Ltb*qy%_*{!2J1gk4G-l}OvxM($(eApT~8M} zH*5>J6tCsqjwy5`E)}BGu4JV7;NXVFM5l_!7%(GXz>F`6CVLEK%O()O-HjZtv7Z_X z+WJ4%|AvtfGaT52og!a=zN-oPq&y!@5uXQgjmz(7V(7_+E=l(g&g<@U+n%{e+-0?2 zI&7M)zfYDdUGX@-6zux~SzRugIdBls031y>qBF zqH>rUsv#dIoaa1O9Y2FWF%5FdH8pcptd|zeKCci9V!A=e4SarifvDNgq1gk<#Pjmj zatrnaP7Do$xy-;hu6T!}CM=McZBvb; z08`Napxrxc!C(uPENS`O_rX zRA6U{d^8vZs5tOn27lGq$o?=~ZEo-Z_{?~DH%T~eI;0QV*Ix7KZaurwEh`D~jj3fK zcQ0#ibn!!aH&6}sel6i8lpyl8374ew3WR9pyGHVyX3f{mRn&iocZ#6t3 z`2>u|5T**>vsfg9#z}ZM)WTz>UA;ABo(_3_69-*m&4=LZ(P(k;L!PXE7{a$F5-k3^ zZ~j|FZqucyxpyN=RO5yu?Df&bqdga|R2D9;h+;)KwbIR~42K9@LEw!mhF~BVXAG+; zn4#n#f>^ZsndtyQ6a!y@9?(yc&XdRrqcUcpgHsmUoZDY!LkFars} z*jA~Y6U0O5P6RwgLDx2i13zWVDkNq$f|oE8P8FjrfVp()bzkHz1u5R zZN2`u)RYrxmb|y&9kv~%G$BUR)r)#JEx}GBo>8drrba1`i&T29D%enMoEGjCoFdq+ zQT4)2{?`4SN6)_221zPotg0W8F^qBLqinj>`VNykE7_B=Hbc}f`Ayg0e#IT0BPlg@ zw3H`U_^LK^VYo$#hd4N1&3apot>g)P+m7RfLAxCZeIs*)XBT{P=`uH^##5N!3x1xnOFrFbcl)heZaMQ}1s@_SDpCeYIy&J3i-gq-6lV~TQO+&bFnf+I z)Wa@L&MA*=;SDFGiMk*)gU&WJ)V+lU7sTN<;#ddE*6)k336%jQLv>qD>M=nW&lg2g z(dsCvtQ4iBKW~V^+)vSZjlNie@9k@D9nec9x6Zz!o9GYW?Bg7x>8rrne2 z2oX^H;O0Wchujig9sgQY8Rjw#IdsWo;-Gvg#R8}D#bI`9Vq_y`Q9`m+OTv+P< zKBC_XtY#OnnguWwUv-eF>)##gvWQ>w2Iu{krZVDVctk63O|QtR&s<}CFym3gx-jYI zC0^*`LHGP8`fn^M6|4(aXkD<{5Pfnge)h@FTupjn4`$b+aakBDpkD(Dgy`l|WNT?h~*eXi5)F^mL|Ih*bGm~*Hb@eHX5A8tAe(_wVkm&QN z3_w$_nvT>xkd$6`ty>^poV6-pDQ`f|pvU38t!bkXTHJH`K1 z+>1*Je25YyKug)Q41jnX+jZ8*f|$#nJK>zApzJTA(XGff^c z2cjB>@J~%PeGeKWK{m?n#g5QioB^I;adS(pfL4ZG-d7|Nb1Fh`2$5*=VrH?yj+S-sorF0W}Z z?q!Dd?|>_P*|}sN-&{y->n?x_4ck^teyW)zZLy8#J6(S}uu7zh;dXVCzwZqO-TvU> zx^p!gbbtMz`=xzI>+{CV=+tU{f6MglZ7uECFY4fCYO686@813LzW?)3P40V{Bq;a6 zKg|;Hvcze;Y(2v*6N?IP>NaGfh{ygZ*^FeWn%Xh@Sau@%9$%_=QPXg&kVr_LfOMy( zlsAON;tl)rQLk1dGIQ?Q0xb^33#fvHbQEIZCx={50-~Z^qpdk#fVBexP6$mYgg#@* zBgblTd_vcjAF*ri%C@9_>Y><@I&XZKtKiF(oU%`R1_5|U@ehwDDX6B=NBM-V31DNh z2GhsrLLo*lyzY`%K&Q_#+HsfNUK#vk)-l=15AK8^?9RW!nU$MEQ!*PX7>rc*@z%1@ zs4J+%jI%XikRwPdrgV++pIW-xT3pN33bvgp&<-nFv-+M&SvrYnI(Sjr zUed0y6Gw3v%L}YnrOc9a1eF?iw8tU?A{zZd@G&RwSG(wR!zwiMhEH&xm$9c>H9KX` z;!%15;e}^_OptUB)JqgGRhmelMwiWH5D>5}#7Uu~L&TUI2JJVFqQR>77;N%{nvOmT zBe2lK{~$;8Z`s*&J>P~(HiR^cV8#BCB%27Jv_Be2-;jR}qj?14SAS}?EZ{g5>7ZD8 zlo$WQ@4e+B4CMrJzR7Iylm=nzzW-4{3s$(mmT9C#t^`Gmbwt$Ui3;YQ@MmmA z>`M$C|CVxyEhFza)8j030fSy{2EmPdQUA7oM(J-#iRWrY>fQ%8!k=z?YWm-G2kMcF z>rBVUnP!nwl)n0%eSFjPGzwqxweY0JTgRK+)%@R>>Ool~2P#pZ@*Yl3(7dZS#bys? zb^*N~BrqY?CH&fvJ9|tU;=B*>&({_MRf%OiE%65}SrP7R{lKg4M|H}me>#S^tajfE z&dTCN1?Rf8vnC1H`=xI&ZC+8Hf3u%mt8uW(nJG-`0%ry)D4~YE9Z?26H`T5M$)EfS z1RH~~-%=h%nQ8F&v$}W+Lt;2S`mbNo*Kc8?;UhiM%I#4h$=7cSZ>b)P&c<^XDkz{@ zXyZqox3pt78Rpt`dwE+5 ze565wivLMTpaQ3jfwR0w4JKkDLR!>wqg!qemSrwR$#`@rCUNu`6j)H0C61PJ_y`+= zskW0B+}7wT9TCqNX5!`&gXaeXZ91RATweRPq^lpXQqlL_bV|VYy$2Q4uZE0ydMD)T^4I)g(#U z7&N0?nQfE^2TWSG5u|{)m3boO8_l?H=h28d<%NDuZ;A_B_w;eCNM@6?MtZiq6Mi6V zZnFh_Mp#}T;ftm`fI$Rf&kwJ1BMGJFGH=Mjv0%czlfLIpVI=Ud!(I615k4yL!)8LzjAdB^Q&|1GCrI6-kl8K+gyP|fnl-L zI|qj9OG}2UcWm`XHsrK2mbdyVJP@WY1@P=t2Key+7tZ4TjpvHuNtd0q+2Eqn>z=ev*dDv`xf{&j zdj*qlN*p;-hXr=mS^YSYmdDgPBcIz|ajwayjc@Alam}WFnw?bpCh&Z9;$akw4!v)| zK2<}_spXpB1R!I)~E<(J*oPnr< zAMhAnKs>qXZ;(Iw;!0*qeQ{;!G_QMeWw_})4?aJv*~Q#E2Oy!~c=M6(aYv%$f(^h) zPGg}8Mc@HGo<;DGPx}CS@UAU^x#d0Qv zwRAywS<3~?k4YwmViGRquo6O@n&LvxVJhtK%-MJvpHxrdAU~1ox#zf`rzLD2=uX$V z<@eu4Z0_0a!Wy$B54@Yzk8@PSjt5eCi7%JbE`BnUaJNAM@n1zSGK)F#4sS2qR4?)^ z+LxbWf?briG88^@^b|u0Qr8w}d<1%IdeFa$;GcK$j)UYa+ZO9ja&rxP@LAnG-Oo<) zoLl2DTnSJb>Dl?r=r%fAruFb(p z3?hfklpP6%GXmO|8hy?T>~FgaZqObcd^-#(o5X_6`gqH93(E4xFjK z|N388I~v~bi1BC$E6~f%Tz`U|Qfsj33T!S34=OFqgR;PQEp9!Z`{-3zdb1?!!k zTo11Lz4jR#)Lzbg!l};$DCgDxAh$u-5)Df+ox$xbZDlW=qI9=3v#P_3Gs}sqx5|43 zQ4crRhAVsjEl6h0=3rG#VB~Nh=K-iT3tqEfim&f~f7ya}gQc|;qt%a~0q*}`fp~}6 zdsEMP{~c%n0S21mijfDC1IZPk?1;*zncft9iu(>rKwd=2TQ*^XRR@R;@*#ld(vz+Y zau~F5BC4eWwsB4c>Gwvi$GGEoEVz>njvGZIIu&4p3d?i7DSBSy4wp6`erXRJFJZIf z2LqDydI4}f;A{5O_ZWAp8m`9)3<^h)8KVtaj6yvig>X$UUVw{2FGT46O1R{!Kav>m zSMSsTx1Zo!@KvjKYGH(0nAJPsmIFi^EzZtt*>*-p!`y>KqUJ@iN}KC=1HrFm{zt|% z_sJ@si!i(qbK&0v!_^OCdL8D>ePy(ti5uYtQyGH((#9ZYKOf8;Yb;<&_{>NI@E_1? zz6;-giqU|xvO|^jb`HkrBv_K$xwO4l(xC(dSqE#h969g5x5bnc)|^|n(vV&=V5~Ns zWuFh>>@B%3Qr#WG4F+kJwe8T@6YTPJ&(I7F1cxf67+%w7g{v9_aLbfwlFBTKXT!}! z6V_h6n+dQ{T}y@hpQtb9g3_RWgmc0s89bhJY{woyX}QCxWdMElgF2 z3m$!oxt_ac3;bPyzl%bk&WY_m7)vj4XG z`m6ijpJC|ejr45r?NHS(7og*_RzjF~v=Ud%_m*?-)|+E|247f;=_0uQR=t2G?j^hg z5^pK)e+%9v$P$c5l|&|-A@TFw4dKGwuv9X{&Rvjqpw+U+0d&-*F|*dhepj2d7{NHK zjbLQ_(-sr(Ba|8o(6|jE7KLC8EmK=Q$%-xTuN!{LDUKY!tpdcJvW$a7nNw6mMcQF) zr|H*$a)EX6+#s5InQTNUa+1-atwc-giJ=}YU8GEaI?M)IWE`;GL#x*f(4K>{3R)(} zSR1A-@%_rp*e{wCr`;=dF)K@s%AyV;fQQaDQtSAt7V;_n9;pcz1+J!=e#Kk#|To_714UzFW?C2L{t`#M{~ zI$i{q#!(+IuKqu%c8l<4l+53$XRrQ*_OAT|j+QTGz6=CN?&wbN=o{1zb++EQ02e7^ z@7+X!M7fV&2_}>2`@a8qoqW&byUx08@MH%sdzTqc76tp6;I{CCv79j%HZlO~&+}ug zA8Hu^LY|{T6ml@yDDSDSH26qxAovYMJ#{PyueiG5>IHts-6ao8=6>70L%oYxGdCJ? z3oK%kmw1cy&6H#~8nqqvKdZAHj(#q62yRdMkt&ZddDnOn=IEe0L4KS+JbjqL>c?A9 z*3+3ICP6%z(4X`BZ}HzF$GZW)AO9IF5c_~R8Sf?#J{iXhO-d#7;g9=2_@3>}WiavP z;3scXv;d+%W&4#P9`2}jYIv_*2?|V2EJ6<4A85W(OP?-)O zKtc3)WESX|=#W}+S~M)@27T=!aEDDJS|h_tw7Gh1xzw^^{!+Ue`q}n7{blg<8<|}FZFD;6i{^ zoUV-WZpJ=|j=qPI^hUtF@Ifv(Vx)vYGPaMXOz$ClI5tULHcp^ALf0$>f;`jrB4zX$;Bxt1>!b_^qcaysMS12N2Ag$nem zQ+fymg;nr|;MuuL$#q_wW)2^r$l@Q1bA3GkxzzFYhG?vq_h%Xk{q{)Qd5#YCQV2$> ztwqeE2S@rznz^D}R^Y+LVl)^Y0Q(hBz*H=kI`2ZTNUE0_Z#7z|rROn^A_!JI%lVdx8d8Z8JHG)jzG zB609S1h6dn1+Z&DSFHZ;iMQ9w&d}%@8Gt%HtB!^4=Odvt8Spa+raNDbD9RURLy2i@ zKjr zdv*1T;#y_Cflg6JeO|3V_UFv!)0!d5uH7W6lmqH!Q|%&`YWGOqfKZB`RkA9O7GkLc z;dUPg4jaUd84yL!suwATBdV!E7@$C~IXQ=fQ4Z+#%&>}Nr1Dyh1aTFr+7qsxiUBwp zn2!)`a4BU3FT=Ge=cX~3FSPL4!d*K$tcFrOYp?)%3Urak-ST1kTDP8|sPj75KLZC&>FtK{!~5v6mw`ky|C zT69ib5n*VZlJXNF;p%8zgKCP2ZS9a_qtAM=lON!`qXV3;IBq@gp zmVtCjR*C`Fw8^T@5grxF_+o4T6}4Gj8k4ipr*U;4O{IQ{3$sRb zp8Y#;&C#8>%1X`v`34)Cyh))@AHvw4E0lu4CP{G2-7 z@Ww5eCf?0r&hZ+o=dKvY{x(^Iwe)JiyrlIQfEEguaInFi2h}wOYo0Wf`Uw504BdcU z4wS9?eD#wLw$6G>V!(QDvLw~V96i|8J2=ST!LI)5Q??*uv&H_aHoCxp1)~c*SkfeU zI3{M1F*PbfZvh%$y*1bol|c2?$cXbriy>S}71k}&`iq59t@=(kx$Ficc#|2`B?e$r zG$@&sUb3a5#UgdS8cxSV#a4sqG`qLi+AP0S4*&W(tcOzsrweO$esmg+{`Tk9@AQ*Z z#eCv#f8EqpF>rUAktPhlV|FE&k_1^1G5ZNd(~vFUR*@%&he5B!*{(~Sup@qex_ccL zW~xwYi_Ibg{V`!DxxO-vugd!~aG#rP+FK4v60ziHM$Z|V)9=4)F&DvQxIK$o_x*R; zh<^V)?&>7;7htEn|8CQ*G5|+!QiAxQBt*WR@YU<6iZbFvc#-LzNFgDfN_Hvyhvy+`g}>qY*Jxj;HEYaIMwE1tUF@su!Wiht7g&S zOQIT01kTRtrmN5`zM>+5Eh!Qvh%3Zz7n(32{kZer=xls#a(A?S|*Bg8&+GLM*_5?s*X~%07xHUN$(-j zoVALq4CLyY(n6JzG^7FWzNrC%qENI@rAWb}SfLC^_5G@C+AsNw;bIy&C}^d~q&DKD zvbyPPzT~%J>WW(=|CmW%r$6i^81+j@9vc2@Q3?qlM)DPWsUz%>@uJ_R7YxzKTVA?y+2f zbWEF?DN9;t2amvZ>ZPYwy^b+}-p&l$pN9fuJ9e%CKYtAx7*Dag2~GCq<;mV;MfPfK zA{YZVs99me7H*KPl_B4dih%918~mXPh@H`4^;EtUjyeeY(2EoT!j-Dv8?HLq(JFu* zQB@uya`COp*Iitj83#ud_EW+)%(x;Aff-F#?`)}j#3zxPVZ~wrxQP}FAHKI7BX2~I zWk6{(1c7g@dc24RFI(mhW5Ep=s!cgT2I1-le}aKZOxvbW)nLLgJ_qlH1huRP!9J1= z#DFks5@G(3%ifF2{lJz(3Z!)d%m$M3P4J`DA;z;fhfMx5!iYW8OGI#g=Q$A#druYM zrpapLdGL_{MazO1`l(1!LfSWqc_ACh>K!4B%S9fWAV&?LIhrh5T_Q*?8v$+gqInE0 zM1{YO!PgknmX))_VlbFvfNz*w7X$jRnatZ|Agdtb))Rpy@=%R5WS>$KAhnmvY?30w ziAX$Qcx03^R264D1b->6Gs9jJULbXS9aYdhQu*yDaZy*95H~|mg&8g8$mvXCwMD|7 z)Ff@Ovxt6{fxFto#|Zlk4+zizB45V}mva$+BvXli3NTxSui!}{LjmL>tw}w%_eK$a z0}0D!xt=owC*cv6I?T!CRG*2SWPkg!Qj>rCD;xQ%|Hl#u4Mm}H(P-_X0oB-SWiCNdDZ@3hILf&ha|EVh zf-B8#Ez-(a1dV+C5u%|)@byQGK6{6Tg^D92^|SP4rq{=S%T!}J<%Uwoo1-oT1S zXYB?qCl(fowA2b&j+ZbL6mtPc%2<&-QjhH{Vv%F^7ewJNt#IyvN#cn6Kd3)84_NF7 z7E|MrqBYgaz$n~0_R)2N$}KD ztG^5@ND~{LLIZ|Kvx=PuE8AAs^7r3iTbqfeGQ#k|k~VH$sI{ZhT$}IB9MhyKr-E+uL#NpUuV_Q~Q-)N* zE*XVyie@L)v9{Pb%HF;9&3N^LJ5n!_$(fzjwDeUDDAj1HBGAPJ*#r`00xQ;tbayM< z3f%x-DGTy4pRNHrp>dKIQ}BgbPN`b7pO>&h@P5hbT_?wD;GQ-u#LMf^mJ&~%hiwtw ze{aLYWDb6sjRs%@jX;W?H3t9RCBnoc9>Db<;e7R@Z!IetS?#)pZilOyaZp=ny`l?u z9pRsv$t~_y7jSCj>ff8}+m3V?6>6ZjS(eraZE-tC*tKXE5)qJ+)R6%&+6~SqX3ChMr zumrh@TWn@hJ0&U*X+wo4h3>;dZ8i<<)3Zen*@F)it&!+4f}_yy1k10HRiw zbG(&ld7G>SPooSHs~o+#^*Gwf9W6t%O4e66;#kBefz!%JkC@|SNq462jjKsY8_j!1 zW#)DH>Oq{1rpiUEKUFMgBcTehbe%csNJp19ilyc)jZ@vIK5_FsiDl*?x8VqtJg^7r z%njrun{0kH(xI&+xkel-S`FCl0xeRO8YZr=s^Ex;NyCw01MCR;=cCTWj&KasVnJ{0 zoGqtFcdgpfPO0}SNQAi*mhQT#>(xoTwqnPDWUbU>Vak;+a9fnaaE}Kz?9kWFAb>Mi z#gxxCgj?Vt2d<>HtwZT}tVrEiUI|@XuwLDb&9rT_EZSHIe7^r_ zHHiNR)Wd#eT+~k8=VTuBQ>6WNm#i7j^dLkiMS`Ee?9D`z(Gj7rf)-Mi5t|siig~6* z(pmUw^+Om_KNctTAkVZ38p1|(A{kU9cYPt3A~T~ATId0cUO74NHsZzO%I`N4TQ*cq ztEM!>#H^|hd!cYWgM6U|&zfm-spmEV!+g1?iF(j7r7nty~0t%3T;7ftTS?9hFG&D=mhUP0AOIqY?_eAS^F!dhu8n)Wk(?M!AYQkz$}O zrXVKLaRPROjh!(Ahm0lPSc+;Dd!6TKF!9~Ezz61E+V2ljXKp^r#vOX7`wR{mOD}vx zLjz-Lh|*-M8cF`}(XX zj*@nH5_A7a7d^|1Trwn{Xp3`%V)zLg%K+6OLKNXx6aQ(8aUbsXYz&rN;LXMh{Rv0X zXH<=iH^=)3Cf=Ce#T}W1O|7lObcQ|O7wL~D_kG*Swjp$@ixMja+-Fd2AsF_)4KsWU z#>y;8YvZM8Z13;HjQ&{-_stixVPx*1s=bXn#7whYlz6*6vk^naWxB^XObrwY+SBHi*A;di#!u0 zM`I*=1B+cLt4?EW{$%s{HmlDtJl~2Mi#2L63>@M{eVQoX2{Z1zY-2m#7u^BOIEllN z9o=Hxto`2C$C7om@U@hx>hpP0pZQ4s+|8v*&74cMsa>!)=E9F9rcZtc_XT!)&;k%N zJKSjpr>F#@dZ=E18{fcYBxsF@%W?x;BbLj zlsf!^F$=P55beDEJ!n)bUTJ-5vD9-}8w6{>iWb6vsdG?ec%uX_7~Usor~)B}Gsjy@x0lnzb520(3#82UmB%oiswy9F)F6FZ^ z+_C-HR)?{Vigs!yzK>~)Io`-U`jOfRZC}z?`+J{${8`>+)E*sNifb3X5Y&|T29_6h zBe94QXOH+NY;}v2Znivk2ycdnUAe=6jkbNVh!OYMJ_JUaEG{`JqP1 zcU*f++5&84rUotrE(acA-xZ z;~(*Ywh-Jg(Oh-0k+yZGxmh2B5BCfJT@&vsgTFE@928YUoT?}rXzO(&FfrvOeK#sq z?O;l2n-`RKXj6Thl4u}yuhwiJVj}&}wD{|TgfUn3VODf<)C8aZl+(6w&=>fcLf{Q=J%||o-L?`D=@wj zHWg_YJv>~+w3JJT(C}qFjaRl$SOqbO5(fQ7eb9N z^O*#59HzqX`4r5*5Ji!d0Nx4xa3`z6$PYr}O*l*Im$iSWL_P1pKdQ9d1R%BA%6eAJ zoDIUr&R{{u;C_T0)B95~CFCiX5^hK_rlc{bK6c7lq?8DPQ?29;!fZNRAzCtVun@zS z*di=$r$m6kD|;o zQeZLldwAk})d7@-3oplG&>J!1JBR>pL(gqqeYTdK0|iHl?y7g%`^Rh4b{!8(LmW-= z^o|%TuvF7e+HySYbo-lWJp+8{%|PT=-BT2=JX{k*Zh&*4nQ_;3Ef4MsHDP{|vF>Jy z=`wH9fte=**b*uewLI75G6>MIs+P1h2A&BDvPSWT?f1?~XttWfC3ZIN9WW>Kc{CmJ zaXH9bF>e#qS&eLjzCgKcsiUkcgCwQps1q&V*ito}rJyIc-`ZZNXleGKyYqP1I*HrGRv>m)RZMQskOdWKTYpX84l2iIRd+AtzaGQ}j@Lxx0Z5GyQ2 zmz(Jj3md6B!?GmOPn6=+c_^U^Mm`+swL#%TL3DrI*+y?z6Et7Vh!D5hUu1yoIhqh6 z?zHP2++CH?QiM&&ux-hQdIznQ7u)4jL?m+jMK45PjaNSm!$8e%GiJw~F%NC`5M~Ie z*5Vv8U=lV<=Iy|IIE4X@@Ni(~N)Siv6zxo++{VxT63O=AYr~3eVUI}k?d;&}#^B7e zR+UuT14qb&$0;A}6xAf+*C}m`Jy4X{i9FnCkSc4a#nz`xKBLV~Q2Fb|s|=WD zja)=D&p=5Le^Rb9eWss_^Mw1pw5+jIJmg^2NW6|QT@0$Vz!L6qtyIA7yx>6PIQ*i8 zmEAk-3orFsmLDODDzOg2>@11QLSebD-GjM~U76UYp4u0*uOopC+xZOMi%|Szxa2v@ z@Wg^`tn5kUbA%%9Y%kqAq^cZo+pE9|Hla4yc-qZkOdSQ{Fb z8O2S(A!BrOf*a$EM zbgBKs6f7ZoxlnvS-W*;d+z--IzyIeoqA>7BGh>kE4MaAmvd)rXFuSQZtjs5gkmXI_OQ@2AcmS9 zt6jCC%pIc2usC|mptdt@ED(ojbD?X zI>)ZgnW%cGqUzONO|>95;nb{=H=t}tCO)ncd-3Gn#? zYh!Me&#>)amBmobY@0Af8zKsi1_$Z9 zef{d7fR#jH)DLXbFIhY?VAJ|Z|`pxx_dRnyJ|FHd@>rx##P^}FosNhVIx zW$o9OXT5fx+8kfD`&Zpg@3Q^2cX3{7H3vSC;C7+>0rmYHQC>s^L(QM%tRkamfMEHk zx{~3jm}Tt?JJQ9G8EzUR(m}A+Falo&haaQ<6%LF=iegQvN2)^Qvzem@6~u%!GKD3= z_hai^ITVENgl}=M@HW6FEks(zWnw}z;CQYsNi7t+6xX?MZ8wFD9UpyiRI_jem}0Me za>>rPKN9ScBPTDSgxb=#{E9}-#z(pTN$n`}armnf9MsU~<-R872UWjx&x(T@RP&!2 z9@EiLWoG@bNd4H2sg$!Wn>J_gs{+ZVCAipo4*H@hl9{n|jNolI3 zf~t$ptVfQP27#q96hf6$uxo}9ZEC3FRU7W!>A2dw<(X<2hSL{jB*xJfAw$9&^5B0t_ zAQ3*S{|A?SI6=Mh7c2wBZJ(d8H@%bYIlhj%zlXa23Ut3w)oL|Ks5}% z#>K&59i4c=RlXWyV{x2?43r%_Lu4k*EKqzCL4j*e#X2v+wB-6# z=f&wfqHg`^S+{p(zT~cwmhI%jL3&6POr%aejXv1ZT&g}zwQ0^ zv(=CDy5M%w%(W~c9w8VFbYp5dO$tkaG*l&dBcGZkxp9-w&?~3f0tB@3GD7qb^E8nf zYj8WamR;P^W!=@X7;b&KEzz}kPt<09!nON^GDaYXK>dJ?AEJ%VHC{(G4b!wvC8z(9 zu7ZJlUC^^v(qyG;cx5-t$xS;_UDt;yP>4SG*XhS-i51o zfBWm&-jIQ6jHbgrX`isKKkAFn3v}w7xYTTJfBn(=Ufkx(-BfL4sz_kuTnl^<81OrJ zp%Iseluet*cnllQuz{a?eR=U_@V47|dERTE_qr@eTV34$zSCnp_O^Z5eWTxm2mA0p z5Te(ex81?jd9QQzrft-BHPFHx5n94inw-q ztA8;Za^20&UClioYkndc3+`b!j{&f>1B)=SZn3rc4Y&CmWE1@JDGc*t7V1qafi6Nb z-)UBGHCc=oAfFtEk4DrGS?c45|JL|`4bQC!oIrzVd>O$sgir9)>|jv3B$ON*PQ`?* z{NNS5>|b2AUtquZCz+f__lm84ch>H|=;iOA@vO&A=J?BQ|9Kzmmh1Bu?3WkkAZp$! zWCsK3!`jz_H~LL9xSn^fy1ldB3;5gUOg9&(4Q=Evg$?GK)IG$Nq3p)aGy&KGDc=7x zn!FQGh#4EHboU-?QX0 z3PWGP;mMbU=>;DK%LjN84SBY^i#dFCMNrXyf3sNX;@FvY|E+)%D*pcKe_K1~V4_B( zf%mrc;)X4`Zw3H`O_pazApI>Zl7W23hY$at^YTr%e|dfW>WY00mQn!QuIzmWS=HD} z;qxdMdb-)Va32wY+nKW-`==sxQ17UHGFLUXDU%0*MU}rueIBNQ98s;9Xyzj4nr_82 zHDEZdvoNc_P>nRM%?Zl@<0`EF0?WWL!{K^rozRQ7_?F;j zo4ZL3S4PZNKa71xfLnm17vRr=iJhTNaghTf@O+}gn97j3=tCq?@a#`OS2O-rUY-O=Fzh8~5f)^C}nL zHU~L=ll1x96^(V}h!OC4SOkI-{>#d>?x5~L3u06Kxw;|kA61tr92G0NPPs+NZ$#vT z2!BhD7lTr!zDinXz=KI?ga`k|acQA7elTzBzN7+{9=u%H3B#C+x zX$n8ndac=vs#EneLYNLQg%mknkfL|=s7aZ~BQ}^+tw|4KT>S|=v4~}96IXx4A%pGy zm?@cXCmT%ZLpi3m(Fn%GmJ#dAKu#1f&r*`X`Iu>$(1#7CRR?dY9pd^&&Hz5Q+6pQV zjlCA(*t2z$(Vc*98?5h#ytfmDKLgK}W!S1X^B2p-q#Rdju-+ScT*pWIpH;3mmZjtx z>UBF8sy`SOISabAb(Cr|J?Q{kZ3-yAY2SUi8IK_%W}k9O;&@#9xkWW>wEvv|R#k7|yxsw(}40 zx8-3D(AkrUr3}_UOmBp33;Q3aZ|>e~ya*&V1x%XVaNG73eI5flXzJj>MUrQs_;DAN z?Q-DPBiGW#2R7Emm&Ou7+jm?#$y2!=);M5rlZW_gNt;dhsUVty8x$_Expl7CPQQ;s z5%@j+ova|8HZCt}E%!piC(~h5fWM_GiwZ3#0}Gq=QRp18oqRgg{PQxYQ9wXE*4YI? z+!k9~&CeHNwl7&~bxnoJZb9RUTKI&Kt{FX(#)S{u;A4 zy0*i4p>^kFuz(xMF@zrP+4fIorCe1#iSvMXDtZx_8)l8YUEH7j&+14`3SB)f;Al@X zeS`2K=$f3`dc%gFF*~8k;=+Rm1%fNS3E2o^ENZAo&y@_YMSt3Y#F>h~z~4c2;pq7$ z-(Dd^&hp5UP%76;T*`pXo zaSrN(re|_^T-OUNXGp57k0Cm4--7;xH6jn*wak3h2 zq^G-!>r(z8%lx~8hJ2MFOBLTOT6k1BRa0$Et!0$7%7s|79d4!=6vh~ATn`7f7JMsn z$OI&Ninl3T@rXwq4mmbKZ z?YA*-t$(of^wD7@MVb-V`#NoM;6K5th@6mAqq=~&(Ae$Hu5D^* z{H)Y4I2{(x%4Lp;9Um(Is#JyvRj+ zY7`sA#g>z04{z%ckx!~Ky?M%yiWb1CuP2SVi(SZZnmPX8r>EUA5o_a58Ri$O&qm|} z{>U$O3U`3@J=pbK(25-$SF{$7;7oYay}~x2pWw@zC?qa76q#9P6uZQGj)uacHGmJK z4UF-9q+cao5I4eZ*%1KH|_J>)lLvEtb50u2ru6D1!uKIc~taq5uE#8rTZ|lxYCG@_?DqI;(I`zCX^HHEsNsw0Rjt;S%Mu^f+5y!Mr zsu-1eAq89vk#H&Qr|&h&s2TG(Y>8{a|}FCtO^?>Q$R|FDmEpRlT624Eu8-7 zP+^*OVbs?MA2jMe%L|`r)GBsnDD($ zgs-$!!|UR>>G8e6#HoAQ?7^#q=f&MH^8tB26!62ots>)y*Ou|GVOydpsHTybWe33Bol&cmR zjNJs|&f92Xj2G)6#yqcK=_LIpl7bWyDj(SsY0Bvf)291@}+k5Qwv zI5{Cyq~|P{E9RN70yw&et=s;HYoG%M#f;+GS*Ps}y{4+|pm;)(WT=iRqM^{) z*MPQHsvb3s&_ma@@G+A`TtLfw9c4U{|BP6mG+94%153i)xSxbqSfA)l~3c z7xk`(l~A!mQXEwn5jq(|_?CRTfVm`ziHLx0kNBVx$$nVT=9!RCbj(xI|NM0%})eMV>1yy`p0;q`$skuf!tNp}ABG z82%K?2XQlf`0KFkLHvywcr8dEPl>1K;W9My_DJ0_6^!rKx$JW)H_@~7Tu}jbZS)Dl zrA2kwwL<HIM`Qzg!A2|$mz@!Z@{q9RaS6cf^p7$1%>OGMweVa-=>|3cR{G0tTe+wqjBpliru+30&WUI;7D z$N$FnQ30oD4|f2wtJ=@wkH^*6gcR=zI~w59TcXf!8>|mvF+#B-wdJY}odlVe=xZ`! z<2pA5-w)EzDs8O{&>Bz&$zhbDOb`G{wQSwet*MH9R;;8}krzQHpUPR))Z!pOOSM4^ zW&$b6;dIZ&Jdk9$qu~oL6(WBc2^OzflxHCE<{+Y-svja8nzogMjfq3c5`<}ELoT-- zUOTKTdQc``MYAe(Jo1x6VUh1Klx%s-hOC%29ZBiX_+faLfWH+vVB@p=v4Qe z*l2+OM<>xHHv&AG7?;`7q4G*^O_h-ACR#fv45>8p z4$FZeA|@CL-_MIy5S&=4L?PyQe!8NXJ9ON$SA939{h$UvThjY^!&+qoK(^z?xnbsX zv@&Jjx(%J`y%nF8xDq_|-nKr+n#!evqMTQCj~YsWDYc=OuzE-lsw@G>=B*F70^t-_ zAZ243sBqxR;==N+TSb+b3T40N-5i084c=(cLMe|Y9c;ir>qFPMNdRsA`5;G|| zq)^7R%2-xoELCb%Q&qR}7$OTS5J!jR7I|F!)-A8fnzW;Q5lE(1jwj?ZdE$hmERt%o zG{JM2%$^oERUfd3Y(Dmw!>tJ{W#VH<_s({Hdr^I4ozJx2ceOs)C ze(9(H;!v}WA=NDwGro+lWdsTdsfus>TxL!47u_a3*Ik)0I)BA|Ml;?{Wel!OF#JgufH#aNa0P3 z-ZVefYHEj^ad##7RL3*QW=Foei1PgBsB$r+gP-c3^zg1%Sz4JdZv1uWR~dE<>q>_U zCW!Lmc9(8Bs!ToR;<9l?!JFRIOX>!HdeLX9tm&)b@CG{ENkKeqZ8Js6Zd~Ob)o|cE zE8bP|9x_GFX#}K|7LsQ2(!|r2%_(9CF^$r1SCxGF$-$=w&uUaj!B*>g%lYcZTeeg+ ztjIZuJpr&XimS?~QUl!lST{t(uQS#GI7q0hkq}l4Mmj8u@N3ucz$K~ zhTQpdgVUI596rsUU6dNY$9EA;t>>I6OehZuOwuDojH8I(%5opnnqu-}Z_yVzIGcd!z z%y$}Is=CmBF;(scQG)oV97=J?iWuk&KXN?1hwPF;)pu5W$Z%thY6rW(`dHbPqb8^j zhkwdJ<_8*I)C3utewW!bsni4eMLN%7k}74_S=8k?Uk^HMR*~E?Tm|(&kzFpEb;cJ8 zx9LT$t-L9c>-p+`#ThVjC>wgSh>v*=0$)Lzpw(god!=~Mx2^S#59*Ned;58*r>!WT zBY6N#(Z@@+3mfC;^J?@`zPqw6tD`>ZJ`-`$FVCa|TM?v*r1;9<)lex89YW8hza9?0 z=noJGTZ=HYbGeUkMK!+z1M|2thf0T1Q0(r45^yW(Wz474L3HK`Seh z4Bh^y>#~{!MA2)V_>zh*t;OyHTz^L^#yc#+K?Ju0Mihv51?!NAIRc-xawugbasLJxmORe~iz+0gPBERLBM_Cd56!YE5ml|k+D#w<;gfW#erDpi{ z4e)j~U6VJ#OoE1RCC2b>U2pDLOL6}P`1>|k>b!8all(28c*v=?2$uKX;``Qe0UpO> z_2V2|(Zw`!qTBc1S=417COPNU?fVjT#LOyf&dhdP$)l2sJ)K{i@10+F&bsYBJL&bi zoh#PvT=g!_2khdMUA^qGFE4uMSFGP1z$2ZPtkXTe>UY`GFZ-PmiSn1&Cxj(2#18Qc zsz-_BDkku;r4P1YCOT(W9#(^=M>z`v;SY9{KpZH3LlMmxjE(DXjC5;-Ohz7Jq{7vV z+)0X5hXyC3aWG^OQ50ZacMR z%t9mHXhD1HWM}JOeNI;IXt`&^(;DRQJ}7F!)w_|udiS9v-jG;sts#qm`@ZK; zm>Y(xsijwc;X!Qp*&k#S-;6$Kz2PS3InzULx0Kjh2Fh;EMYY`l8lc2&Ho&XyDI0RX z)F}Reu^FHntF$`vZ5;b4({0yMz}+R!K=yf=vb383d^J+k__#md%J%sfjFPz7ZbFl` z4olkREVc)geD|di%GUx5go3FDLKOSiNes&VDiYd=v(Sjs&BGq5PT1|hvrv9G_1$j?-??Z#>4 z4l48G6+La^IWqhH+vxr$cfppR)#t9{cAn}BjlX5F~*L-OS-{SC;^CS&XRFG~zaBw!4 zr43goC#Gx>+5E#oN&!JVV{j=3OlIP&cgA0S_5*y?`N!|A^*x&xF zRu-2Xwq^DIhV+a+f*~D2rjH=Vw@4kq?DsjD?<@BYcwj*{H1s>-2wpZ4V6ve+CpK1<-{2e>I>z8xU{DiTp>jQr|P0cmA?VaRX%81YRA6{5{U z^&a_%;!uqtG|pW&hiK8NfZW%djsf9pl}VspM&$eIPtyv-xN$ta0m0r3P(A<`#-Bt4 ztMIkf`vL5Cq%lsMk{|(tPheIg@#Ta(75q?6Wx>bZMggY3K|S6GS%n-j1Tf#w+4=O7 zPxcS$x^I3~bfFWuouv2KK6~1`-2cTFtfPs9bCqnGUJj&<+&*(Uya83g<`Sf`cKe`w^fNjEeNeFFLQcrO zECMyxJGnT`4D41R?Nj24nZrZ;vaM=@wdt}XZy_3uW3ERom&$+65M0GCF%c76Ipm-# zoW|;pB@@pmZI*!rY0CfmLLpU24P>OfEi&DLN|Rla0eFXbnywqpr8RM)B=Z3#I_C49 zXLC#+fT3<4JUXlaA{Ly9a}f#F^(Ww5g)VY4k}7fW)lBi;_D9={RjA`lZxD@5Wc=}-VN5-qVrXNL_z3=_=RWMt9=lo2KW48GJROAIaHw`l zkGYFumf}|tVG-8)T9Su!VkP*f?h2x1ZD11l3c%{cc|6_!b zJI<3RPzX53ks}KC3LQ_uzOROX!UjBtO47Fj&Bv5knm1tJ`<4`Gc%JzSR|V}$#&25t zqzEDQ$hCFzl{98!_f1+%_q$gY19sA7XKlsHf=vYX*g%ZHFt<5&=aPiP@lh+L9iKVe z>p(hp$)E9tItq50Gmofu6Q|D5tG{3ibB1+!@NJRKkSqHLUQPN$gN!IHp}D5hn@4JE zdb8wi&WrA#dv-SHylkIdo%K2|yX$r7I+gK)t7*-`c4% zAd#D~EM7X1vcVMF;OZUHDuVljsQ?$utQ()quNGL>>cL?3S6^T&U*)C;t9QhsxO!Kz z-I}v58VJrerG046yah*6O?1XCzk1zG2pl+47Zij|jo@8_0b*Bq988+NLb{M}_)wY@ zoC62%Jsss2I_u)<2OB3uAIP)p43m(_gvYt`2XpKyi%c=qb<*sM+y^PQK8K%~!0yY3 zzo;U+{$L_`a~<8(2wT0gBdM}eXbySPsg937S-n#Rrb@M8j!)p})>aZ4kt_;f>;dHk z)5pn)bLHw!z9`scN&L^OVYa!%XlS5cg2^)Hi4(GzyBNGDX$k+j|6l&?|cUu4WTv%79Wnl?;Q zJ_jSh=MRQx*)l2xV&4b{J_a+=`9rc2%4U|4Or#0qNyD_pr9uNxO50<>(2v59D<<6NW=Bu}F+UGs+ur4p| zf79)}J@0~D{9G(&Qed6UZ^6Lz*!@o+s=WI@SYWP#6}kjDHAlsC{}ak_-*a4P;d2sv zZY<5gKU$>EFyTC1{pc@SY!1SW-Twh)ob&!WTd6n?TyeBpM_w@Vz@C_F&$lw*wl+O1 zFTka+9X^VUA%u^eN8vI?i^Ufkw!M6v*FGwnU){z@_$=B&1f@hd0&zU0<2}r4PJ91&jrXQk$d+UQOtn(G@FCMv zI{K{2&7Qa4w%K|6`qjmoHtbIZ1p{{ZYm6MR!33;vkVe6T z?{&bP6cwg6J}LGF6DRi|Ygd)O84xp%py|V%@}_*GVLfCJd0w!za3?9lj!EhTNa&l$ zUbqvnl+>)SOKb zOlNyb8=X%h@$_0*UV`X@EbLMo#vP^*Cdc4Y^b}%!t-KE&&*%0Z` zcrE-1Ogy*|;0OCc-heUPV&||LaggyJ)k*jv1f8X@4`oQJ(UmY@csCUiZ(#O1gG(O8 zB)-+pu~cU)VE971uriJmtN-pG$KEUDF8MdxU}rHg^*Sr(FmGhrQ7Q+QZnB&d&;&gJ-CC#`wv*w~C zN+WW4sx&eoGdLD{5~MzT%$NiOeXud$e2aOCDK*?It4nhWvvCRWCkKbsNE$@?<%`Oz zkbZ{>y9K2G2f8{B#ztz`Dw`+_yiog2qvvp=@9Au*2*(z#nDQv@lNP)leT4J zu-1@7G3HKHeH(N$?L0;|^k6~IN&rhxIKw?cZ^q=&*y=$sac6&=A&mo44|uaN{S1|w z7kYG~y!GRjR*v|F4I|f50aZ;LLS$a3JXCu@j*qnzv$fz;YBCpBvd2*zOz)xU!qM}P z{;65N=-)b)o$sEjy9!oNRG8iF&p!FArX$2r-n$rd2kk!l@_NgsuUSBbI{Eb4i~=*= z^Q$Bj^B^?#aRC_v)=!fuz3B}G-E)Os7%63T;B_Lu0w1 zfePmlIDhqSis%tS;XOU=UX>_`PF3iSC~ELgypFjqi$7(U#Gt`!<8w!|OQQ=kVhSX5 zw=lQO4`2fl1UPtay8X-R^H*2w>yO?b#?w{lr$BtORGbKF(;#eo29FQ->)JFwEB52a zGZtsVG&j=Q%abr_X`dkhSIw)_E(FLrmog;XB&0ywrj9#4`sAQS93N*7;#s$SK6v%j z!Pbp|+FqAntrV#2eDMz#{%j2Eb)q;xw_QbXT*rn?qx&RxbJ2?Mw?Cdb9;Z+9+7wjJ zO!}&=r@gbqssjps`tj#E!*e7sH3MIYOp9nG?QDoOP0shr*71q55wnrk#H>nclC%}p z!bSjcrjTf989=%83vD_wx%U#%NP!e+zULwd{d|nLi;#neSLj*k!umvMK*7coRXR`I zB_3@`;*(PkjghY!TdxUZxy%f6`-Xj8@2}5_;HV7ar#4Sx7tm52g>{EPW#J~hc`LyT|1^LHDh`g3%oh>A3ViXt-38tZ=@^Ydb~J( zoeU;3M$L?dK^@v44`C;<*iV|@#b0EB1XKD$H-Mye2q_dtw-%?FN%A1GKXu2YL-X@= zBW8kFZ~x}hMcDT9lBe4lbkDYJ#DkMt8cotrBqwVU6?>{$rkrGSaC_tnQN?9$dbo{J z5^YwKO(>3TjiiH#uJCm&6blAYk4PSe3q`_>{p5&8*unJ<*q!9ey9tD^A}ImFcwGtV zDZYnO4>=^Z?vBEQKB;SSU*!3zq>jskACMK;wtdtGB#SUeK+k|#i$?1xrZ^r8MTIZv z0#i5c)R-*v)YI89CEVipn6@SrImz~09@fBoj*3v93ou+g?%ORL2#NCsyK2pNSl61d zP#7LJq8SMf=_{wiw2D$Zf%{Ns2H(r553QHHS)7vJ>SPWoyAK`Gq0EBhadten~ zW?P;^UA=svlA)_Gbnyf-0J2;g+n;S7P3xUAwaW@XsMCRyxOTZ509?1+*+hu20XnNb zM5y6ZjL=qPTvmC0%|Tfv0)h?ZRvoq7tD6zLzWVPjQFm@KIRhJ48P-vxEqNNLn1BKq z>6DhH#+E()zct~b0)TzYr~QRGwc2<6`@&${_EnZck^>9FZbG*QZc!84?_@2eV z_lb#t*Wm>=AUYeL!&JJ&Cu#NLH&~Rd&PgI?vS&r;!-Dy6F{QLi?yTNYfk+z3S zAMl2AUUP_sre`30SJ^-FI3Ew|x)wIY7^Q8bFm$2@mF-j&Hwh)+wWCAx`j7Y-jQ#59 zc((m3CFSH$cT+|vg?e6R^U}s1c!j6J$#uxGsFh6|;!$HzF$9Vc&5#OIP{&_5ciV5^ zQW>AcuvVedN!($%*sjr9B(t#Ph}^Lq#c_@*;R|Z$dG}FEqzoBXt{<6dB9pXP-u}QV zQ~D~Dn3J`qkxvH$Sz{w?98JJb%>{TPJ)PFkBm^pFpkP7z8DV(|#0lP(nqp6POHxS$ z-L@jGhq{Sw0yt`Of569o?B|DxaDzGS@bzz z05*>7UiU99yX|wa2dx~5b0y? zCUSRjSB{=l+py1y`GN$@3Dg~@7YN9)IqNJg)Lb!`?E=1-t z*t1(<0tj8t?xdJ2h>sVA0hk8qPhze}6(Q}wE2%b7kMk~gr`YnVed82q;&_n%SSTun0b83UG`E)? zH2}=0D{ji|w+d@M7L}2SmZ?x!g1jp(PHi89hrVI=1#B@U;z~gxI*5jNpd9TnWosKx zqlxSTk2#BIZ4`G_7W21?GGwH_JVNhg|NQ*P#D79yl^ z+p^G^PPKfLj_MTx7ONAMgSU@wGGd_7M6=V;=R+9GGGQk~;!4FYlWD@2?>qIt(&#VH zo2eRg8eN>)4Dyq>Yf^@jWjXI1~9Lx zdgHc*Jwe4*&}$}8OHGh@b$i#w(iHKe|U11ob50Qu4&i7w2S981}7VXndjt-CN_&bgZrJ3LmVf5N!S71b2 z+jrB}Alpkzya~MsZV7fCWWvUuk;JN!ppX>j@jFx$s*iNS!-5E07r9qF%#Lv@R0PN# zwpS)sy*Ooae0wISo7QACldAc1CZUkz7*r=cH1e71LBpsdhr$K9adyim#JF3Uy-^CQ-BK25M5$`X8@bs-p^4pFNxMhghxdu5 zh*#(@xBwV9)MZvkAMr;vEpk!!X(6GV=kfcGW+S=;1v zUGTqw3(?5YturxtbyoA^+}+Kmb6ClGiwKAlS~%PA}hz ziVhI&Nx|c-JrVkLGO7cJ#`uNz~7KQV}WA;#jRz^;WmIh}Lds|s$GTTR;9}`Wk z>p5)mEx^3xeD&ikHMm}iNwCD$DHe5LN!nVkew^2f`7DPEh5RtTk>DeYtHP$ej|b?q z2iU%v5=;89$2v&nn$Xdu$RgBbE2{;BHaSaI!hBC-13IE9dMDct6Psp?;3PVZVjx0t zOM#>^F(?NgIM1ST%0B7(P#y#2Hw{Bf0s%X^$#=qCtlNaV6Rw*zi@|m(2FtP>^9*|x zOY%;1oGGrpDfAZEUYyB9)oSECjCQBfyEyL-*h#P7?Oe47?EHeAUn410A7AaBu@0D8{R`H) zc)j}lIrdcSo&T~sxaz%VcY3RTIcHDdD@u`qFR_(Fnuq8Rf}TD7vfudyb3J#@jVuS= zVkPX7S%UCNhWC6w+V=IUHhasiPu}+0_rK}BPP&P?&V*vVPD_-aW==u!a! z_TUDilb|GraiZ4-vx=jPor`n!6}xQr+u2T-k01O_wj>+AVy}BwaOV2#`uv2QuKu+T z2er$x9U31$na#!YQKhkV^s$vU)xds5GG%f>|Mq9BZ$vTB?e_on*HQoxc8VU@LNAZ1 z=*!W^JfE`|jeJbi=%j^YQUUK|HW`7PaxE3;Tw-cMp}Hi+rZ6TGauxK2FY&+{v27V3 zFDf~n7@djqci$lmEhRRd^as6i=n5=XWpHGYxq3KQ3!&X@3=*MPrs!x!+Xxh|fgCPW zA!?f;w$ZtxU)SbC92MN$<}lDGx>^0g8|m5azq8QZSS~Q`l*xSNxUy3~kT=J7s4>V~ z3ae~l87OBk(59~0{Lj)sPCeJgDpD1ue3Ei#$Jsp$v%tV<+$AI1z@!bhn+-QzIN@AK zYUDZgDO=D-j{J(QbiWk-M2!6)3D0d|-kVGA!^+eT#-VfRAMF3mRp&OLYt6_Xv>b_U zppMi8PZo+Qx=agsDo5jlfsUThg(K=4;x9EV>>KvneJw|h^n9*baK@c?|A_2^AWo?@ zr*v>P(7nV+43AXTY_#pVCVa9(c>o{lU5NB-j1vvX{$s*b@RsX6n<1*-KacO2FRSMw@4 zk532ft6!+Uu@+UZHXMX4FX{2&r}cEU`#G8PODqyBr(cW@s77qyfbtF^syAE2rKope zcNDgj^<0PL-U%IyjeB=Iup)K%>ms9MP!7YTN${!?UIf|0hr25~zi+SRW=>iC3< za`J@;Uw@>UBcR#OvkU-a_G4hnjiwAmPWvb zX9h?*5qu61P9PkoI_b4#snY=3QQx2N(qAru<^8wV75dVf!-A=He%VHU#y}kgMA#?w zA}kN@Zl7GTKKXT=M41KV$ks`4kM?W1)AQn?cXh(LE-cy(?psa0HObJNK)65-g6)f^ zdXb(d4-G=$;Z?#rz5}ULM~{)yjHjKK2kdE1+A#0tR;5!*cf|N0YY9z1D+Uz6$pi-o zT?NEsfZgqn1Pd&5JBix>M7>O{GsQ)n5LXieZA_(+Yb8@+KaEAE#NbSZtvg|(NO3W- zM3o^~eMrQ>a}`AKM|BwxLX=Q8S5jT0(TEj*bpzV|R@4#jd{#a$F;i@OP=alrl*UmB zX(KFnWn5vmqR%I~0>}F~=5?`SzL|EC(<-mYfhcD6lYDkb#cSezq7k z-bAb_Jf=*Iid{C3M==4|sXGUSisN<=Q1%ZarmMgD0$B@@n-J*_R9sEb7}^PwDjjDL zQmW_)pJf;fHEU{N=ksfYmD-BcF(r?$_>f_m&c?@OD3_|GD3u^&1fbqz^~OtNP7@Ak z0~Gy;(V3^>zr|jqk8y7Tua!drORW@DDFl6cVuQRq zp>E$QP(hbncKZpY;A-{j(z-WJ!*8AfXw-D&+66zvQhnbk`%EyoksAm7b!pzXWJj&h zgvG>E8XyX~0W~GHq(*jvocQ`9YxN(YuvNE8hjqKXit3~qaKYyG`auofL_Q4Iy}Ce1 zQ5K5A_Sk@fTZ!Gur#BMkGWACO>OUyq1XioWdj61?q8Tsa%6-G=(uT@CSJ7i9kW!?z z4&%r={iTq&3SwN{J9I-`Z(`C7A=yO}J#L^rf$1$(_jI^YAj^GZzEnX(IYDXt8VuVz z?2e1cWBTk1;luM~4jqGFY!g;cLmmHuXf)t3FtYSeQKww@*k7BI64zjDF#=DTD!!6l z+nWD8C;~%FsCNS=q$vw`47x^TeH$}x-^vQJ4_}G=<@@jKrMUkslNqM$3Ios->$VBA zui;W8^$TSPcAjG#ao8ydxr{5tQ7WYLnqX0gOrgPzhLtmf=iDrr)BW)^XYy^y80A$l_ zzSo(EVy4y=Yns54i2{YC^RRr+iu`~iCn9fX8X?Drs0fyA8=hf6kT%1oBa>j{fx;yG z4S~{^1T-U#2qe6TYn(D@6~vMYZOqmZNi@73jX~-rL`UN#@uE9VQbDk$`hlIdpJW(- zbYqcBnI{e&b93Z6KUDr;dSMLs)Xm6MiVavEtntWHh6JjTR*|fPj7NKLbXI?Cybd2E zS}}lPtKkVO*y11uDJbfzil2aWF-s!_+@BktN7gtXixVEZ!4S6}7Y21M4)&f)81WJz zPBcXfIml6Ec;Y+?mrj<2sUgdAgJ?>f1U08MRujW&dEpF3X!-(+Vs*X9;O=t`tgPUce z-f(Ba*;26ORC4$IcWgqLq)3$@g5l)_ad=z4|DM?%gMXL!ogS5!+Swe>ybv`m4h;UG zssy-H&O7}nV{B}hec(|u+g!mkAS^z6(o-bG7n-;Df}Qj*c?sc~|-OXjl8q>|}Y*+8BDqs65+1b_qwY{y~ z+;nVE%!f66udcf6b$c*4Z@+wvTiflk7v0xgScW$KeR|O!bY5PczZeXzkWGO-eQ`SY z1#4dq2HpP4_Stjz#DJY&zrq5*=iPp{*LgW8u7`=@zFiex(S{|Y0q~U=B*l0s%(r=j z)Ieaj2F&@a@+@!DshX$?jiIj_)jrv{C&ls zAoZZiF}e>9*FY$t8BIjI$ga0H(iwJGaX2qX)iB4>562Z$+U&}>86gNapD)>b0nYSD zgGlGff?3{wt4 z6sUff?A?Gt)^LYC5N@mgIiaN_n9!-`BP7Zlwb123_eBr@M!iUW%^mD;6d)x+q?q4U zKY))j5fZbh1W&PLoDrk%?7AWTSK9Cm!4T0R?&xD9yJ{dvUrCl-A{86N={}6~$he+K zX$A$b9LaKgC8{<*2Yxybcd!Lm#!tCi0>Us+Ui(uo^>q*xbOu%0xF1(;F&N4Dz+Z zxu}T!M5Jyp;VuH^uijB9F)UUTR~U0*;1fjIDp!BZN3ii>Hi#Cb>M z;T>?V(?+V1xbp(l98vXZ*2I{WEL|gG6E#3UP*b(?tjH>#bRK+>A&@BpOfnDFXnO;u zZZpMonb004jhWWfTLy$#v(nS+VCM5Vb|cV*r!H9*EjGoVNPtlkK6?LsumpPrLneL% z>Z`@T*!X^e@7{>zOeW@|2PTbYjeYpGk_2yFjA}q>H_LdO#4rIA+X2(WZ%VeT-l^y% z$_|~Dh(z>_0)y$)gj?O`)lizlfG}xxUSFvOKmN_Sr*!ih)gW@9;ssw0UiGMl)mIoZ z!N9?rcIQiYDH9^;Z7poD6AmE7eA7Pfwb}FT+sljl-}k#G=e;g_Gq_y+%h`GN#f2gP zx_-@`w@=z{I&bkMWG-=6d@-h9=0 zxs51-iS_y5K{#m-bg=BVy+P+?H`B#zut&r+nP^!OUfb4@Fmd)jObdlk&mNINU0rqA z`Nerm3e_ucLS3JCF;?2|b-H~pm7jJx{jHEenTU;kaN1|x0c&??V>KZb2E+8^SHElb zh!g8tO}~rNhxubb5gvUB^M|uRv)aZ27?8>zhIpQ}lP)6dUv@hf48FX$PJo`P-(9VK zSMmrNBRx%6sSS{xIV&|H*k_FT1TSdjEj+5D3mzc1Tm$oIz#?rjsV>%woPxLJ5YSF} z>6hfCO(F!a1mWtZ$_(WibJ<+B98^}*9h4i7wP7C=5JUlTjE+Y`(i^`(S~)Eaz((hPXDI*>!F=Xndja!lm%BA$zaZA6$D{n`6=Ia_opj zuXKn&N$J$=RIr0DSkmWC)5K&N$LfY^L><1X)@cpvB`?8I;t^I}1d%GuuvUM98;%oHJTV8Su!!@8Hx|NiTLV^3i`ALGkuvIkx+n`$+wAOm6O zMi_tT`qCPQaRE&Cw7++Tyjmbw#!fj5Q8!!@dA0_Tq5E&&e@C8rggvhWrLGpt<+qNQ z1?m3~ng$pFHQ=tn%pjwSn@%hiHCxJJo4}WAs8F(xsQP?FMF!leGSI>|}-H*ZXD}Mt7 zsof}OBKxhwe#%9PPe8jst{^Dh6dOnT8T7W4`@6NLYuc@FYhP}4YhOOEGoX-qq=mO9 z085r7Otn(Yv7TU7Ym{&RT^`-hxgGGYlj1+c_dt^Bb6N(EgK~@*IKGmvkL9CDj0a2p z)x77Dkj7&!TITdP^$!Llz5*q49HVCV`9l~jZAscmR{T3z2I-1L`8yswd`I$nBAv8i zOJ8L)59fg!W{zWY<>A$IHo!9lV{z43bYhd?-0-I|=G&3_u4=%??lqddROL>~v` zI>hu>^_wT7{fKMU4~!^YFIV&ciXd0mj%|};ryfuU|tZUfqICeM#x z0niG_7B)y(obg34k*Cd+hgzL|{nVh3+vIljm~=tL8K>7gTER(WM(}eV3w&_PI$7{i z>GDNPq$z44q%LM(R}jVdEdumbt)P!RdwL{9#lKh|6+hb=6+e3(ocuPByGlp%L@!fC z!3cgAY*{MFKW?RmjOo5PFs616)Ca6(%r?_wJX%hRI1XRDIC^|8FSm2+HxS(;nheat zbwRJl?Hr3}$mNU1t^+1w;ZTIG830Z`yTa-o=R$ zTC=U-wH~1_U@lmJnh6*s6Ipk zs;mKzT155Y^dvw4KIL-%D7D@on6YO?O}2P z(b!J*(~qD5#)!E*;6MFOA0i5~r{ol&sY|qt8%`C~v#7W%@tOF@$8*emSOG6kw#YIq z{|3rFVR8DQvcgL>rRlgC5Ys?sXgSJimcjmxh9^@`sO065^&!3g)nf z@1`uwgT?0$H&dlHGLKP4Ua6T*8tJ6$rbbaF(>M7^*+jwB+xgl)lb!pA&(7-7t&IQ> zH@lT-DIIdlG%Nl|;4-a_n|90N%3s*nGx>{Q@sCUp_jS+rvZdU1B+{z|c13iJ5&))7 zsjapV$L3G7+Q-IpJdNxs4kX2iqPAg@N^buaf{00g^JmqP_#6xeN+n8;gO;)w-^AZ zU!M35U;8MR*L12wdV;frlOV5*K$7B~?r=INs}G{wQalJ_A}atHU{%{}u89W)a}Pm? zD;UBAA#6JR!lte$=Mos^5(wpy?T_-<-yb+E*R53I%Uo zR!N<@nmDyCS8iuSTVb5)3ZoTnbx6ATFx~?JO(*t1on$r8>-jue&SXTnVxVnZEXyYv zBY&F+dFA_LF02Cst@NQaH4HVC_dDurRECSIRK%e?C}wjKt_uy(a67&2Zf~jMiW8iQr8Jj3U4@qH8U*Y{rg&Z9hZMZ^a5f2y3#;;oO&`f$+uAYXP>C$*`fn58YMo9s+aN^Yf??yXMh!$?;uPQEDa#z_fVqAVmO z7>mY0S)@);3;1*tEx0PNm#ov)LbUO^{mD2Fn~W$oy`&&D<-j2WMmHgrq(vmd&_S>W zq@y=e{3=+jSSI-F5%Lco9^X-Rp}bl_RnR=LL|QACCtZ>`lK3`JNO{8%yx}LUp3n*x z{3$SaD5ts}eul45V=oDS4*!%sME;{;gP`DhggN3-O1o`_+I}N;2Q)3tREidD9B@d`@ za9R9>mV-dnIATMns;b!*Lu}f?{A@G#a$AQ$y_lGRJGh{|Ya95WigpY}^m(JTgiR#D zwN83TNn!J0w`=tMBM}ea#U8!vk|_w~s!*OLn;K7#hI{MDVh#=s>Gjge9~co5&6a}X z5*;WjX>_|={lg^>p2C%|+YSXgB}ed!8%0~MBFQsDsLfTiJ1Xw3$5<1Y7ey6`bOQq5 zhqU{E?Dkh|&QaDxWwYd6h)^?W_S59As*wxBcgY&}xJ?Ju|GDts8r^t@U&X3?f?-5r z9me5Q?dX!W--)hnR*vDTPH9tA^C-ro2~1LZ zNQ|M4^edQDsh{~?lmpJSUdDNbDogQ0Ftw?v+56&NcR!&PB8@5T_wIKb45Qva;?vb6 zT_C^h{>;NQ_n_)iU@}d|*S^>-JbfO51-EN$tS``_31UBa^;w+vCm1}~BA=A1vJN~+ zTdw)gG)eZ{NGtc)%s=C!f$C6-e*3$tw?xZwd2#;mhvOGlXUD;l;3`Zo+d5D*(@U+G z#N=152zwC5!z4!X=2C?A08??XWNn{#!Ko_3~=>v<-6VAycQUNrDWBkMWSc!?zV=Cmc5*c)QuA#8lwU4LsN zSig=zy_qXIeWIzJ3+l$q<$g+VPoVT3Cz0G)@&~z35ML|0eG!rFjeO&`KRbC2Vr%_L zqV`GD>wXi_5v%(@Wt*1=HE2!;EB5bm5XOu&bBymp*L9INDvpZEhnP1u7?iFmEb zIMr)(t@?O#rj$|YuSe3N2SCc<(n=mk(m<+?Qgt9s?~%`ATSI28+(=+B|WtgGFXlao89iDU{E>0I!{V4Y^-^M`P? zj2KN=b1Z)rVZUNXt!yo+Z&)E?##aQBIGs@d)6&DR&j0d^M2)X5+Lg}Pb@feSG520B zaDrp3!4Al$7Gv?ny+LCL1IJJSDAn7Sz+>othw`KyQYpB4kQ7)#!!hLN&|}(!4x-%- z_@RiXP0isd2U>4wDVa0W}<#xIPRun1I2hhpT^QPl#nL1gF==u?_p#g2(_*qATC_U z(|pJmJQ=2=;%*ELri&@d2nW5XDZd|#+C$LT3tuQQnoiMxzq|q%@~QGza89ahlr|yI zP0M{i3_WzpdO!>-R2{j{`cT0ptWAGJ-oXv4YNKyW7ePewl2`3J*WVl;u&8w%&Bo~5 zYs~~kO2$@WvL<`nN^U9^INJ!T3+cUX?F{p>#d#~3f}xDuWYjKb2M?K%YY6v>-Abo@ z%{8%Nt^Z)jtlA`)jQXuQS> z6j$@E^2bpAhszL{{{ovRus<<~Dj8rxT5`05Xx$$I(#Jrv)8P4qatX1S5!aMK(t*+( zX+9)|3^gx075vsCS%!~BuOsrHgaj>?sLADq>4f^1+gg>q0S7+naO)a!_Mvuu`OWzm zc)WwJkFJh_v!gfPoWDJi>Bos(d}C!%a=jAlNxjbI-5OW978Tp4bD6S;Q3d6Mc-Fxh55z%^@{jrTm?=tE s(p@IqMHok20RB5Pm;e9( From a5f6edfa6c2edf9ecff17e5781315fff20d024d0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 09:30:03 +0200 Subject: [PATCH 202/287] GetCSV refactoring - changed to mirror the original model class --- .../common/collection/models/CSVProject.java | 39 ------------------- 1 file changed, 39 deletions(-) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java index 19606a09e..0d939ca0e 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java @@ -13,23 +13,12 @@ public class CSVProject implements Serializable { @CsvBindByName(column = "id") private String id; - @CsvBindByName(column = "status") - private String status; - @CsvBindByName(column = "programme") private String programme; @CsvBindByName(column = "topics") private String topics; - @CsvBindByName(column = "title") - private String title; - - @CsvBindByName(column = "call") - private String call; - - @CsvBindByName(column = "subjects") - private String subjects; public String getId() { return id; @@ -39,13 +28,7 @@ public class CSVProject implements Serializable { this.id = id; } - public String getStatus() { - return status; - } - public void setStatus(String status) { - this.status = status; - } public String getProgramme() { return programme; @@ -63,28 +46,6 @@ public class CSVProject implements Serializable { this.topics = topics; } - public String getTitle() { - return title; - } - public void setTitle(String title) { - this.title = title; - } - - public String getCall() { - return call; - } - - public void setCall(String call) { - this.call = call; - } - - public String getSubjects() { - return subjects; - } - - public void setSubjects(String subjects) { - this.subjects = subjects; - } } From f3d575f749043ecf12f8b44ea18722dd684d39ef Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:03:57 +0200 Subject: [PATCH 203/287] GetCSV refactoring - changed due to changes in input resource --- .../dhp/common/collection/GetCSVTest.java | 92 +++++++++---------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java index bf5e3dedb..292fa4666 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java @@ -26,15 +26,6 @@ public class GetCSVTest { private static LocalFileSystem fs; - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(GetCSVTest.class.getSimpleName()) - .toString(); - - fs = FileSystem.getLocal(new Configuration()); - } - @Disabled @Test void getProgrammeFileTest() throws Exception { @@ -42,11 +33,11 @@ public class GetCSVTest { String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); @@ -57,39 +48,39 @@ public class GetCSVTest { if (count == 0) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); Assertions - .assertTrue( - csvp - .getTitle() - .startsWith( - "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + .assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); Assertions - .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); Assertions.assertTrue(csvp.getShortTitle().equals("")); Assertions.assertTrue(csvp.getLanguage().equals("en")); } if (count == 28) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + .assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); Assertions - .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); Assertions.assertTrue(csvp.getLanguage().equals("de")); } if (count == 229) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + .assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); Assertions - .assertTrue( - csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + .assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); Assertions.assertTrue(csvp.getLanguage().equals("en")); } Assertions.assertTrue(csvp.getCode() != null); @@ -100,6 +91,15 @@ public class GetCSVTest { Assertions.assertEquals(767, count); } + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(GetCSVTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + @Disabled @Test void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { @@ -217,29 +217,29 @@ public class GetCSVTest { while ((line = in.readLine()) != null) { DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); if (count == 0) { - Assertions.assertTrue(doaj.getIssn().equals("0001-3765")); - Assertions.assertTrue(doaj.getEissn().equals("1678-2690")); - Assertions.assertTrue(doaj.getJournalTitle().equals("Anais da Academia Brasileira de Ciências")); + Assertions.assertEquals("0001-3765", doaj.getIssn()); + Assertions.assertEquals("1678-2690", doaj.getEissn()); + Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); } - if (count == 7902) { - - Assertions.assertTrue(doaj.getIssn().equals("")); - Assertions.assertTrue(doaj.getEissn().equals("2055-7159")); - Assertions.assertTrue(doaj.getJournalTitle().equals("BJR|case reports")); + if (count == 7904) { + System.out.println(new ObjectMapper().writeValueAsString(doaj)); + Assertions.assertEquals("",doaj.getIssn()); + Assertions.assertEquals("2055-7159", doaj.getEissn()); + Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); } - if (count == 16703) { + if (count == 16707) { - Assertions.assertTrue(doaj.getIssn().equals("")); - Assertions.assertTrue(doaj.getEissn().equals("2788-6298")); + Assertions.assertEquals("",doaj.getIssn()); + Assertions.assertEquals("2788-6298",doaj.getEissn()); Assertions - .assertTrue(doaj.getJournalTitle().equals("Teacher Education through Flexible Learning in Africa")); + .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); } count += 1; } - Assertions.assertEquals(16709, count); + Assertions.assertEquals(16713, count); } } From 58f241f4a2abc0aba95b56248d2bd59b2507b324 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:04:44 +0200 Subject: [PATCH 204/287] GetCSV refactoring - changed due to change of input resource --- .../test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java index 292fa4666..d24083e0d 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.common.collection; import java.io.*; import java.nio.file.Files; +import jdk.nashorn.internal.ir.annotations.Ignore; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; From 5cd57145303bbf65ecdfbde572f874ffe28a54a8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:06:49 +0200 Subject: [PATCH 205/287] GetCSV refactoring - added ignore annotation for fields not in input csv --- .../dhp/actionmanager/project/utils/model/CSVProgramme.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java index ea89a75eb..df06fd6b4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.project.utils.model; import java.io.Serializable; import com.opencsv.bean.CsvBindByName; +import com.opencsv.bean.CsvIgnore; /** * The model for the programme csv file @@ -22,10 +23,10 @@ public class CSVProgramme implements Serializable { @CsvBindByName(column = "language") private String language; - @CsvBindByName(column = "classification") + @CsvIgnore private String classification; - @CsvBindByName(column = "classification_short") + @CsvIgnore private String classification_short; public String getClassification_short() { From 5f674efb0c36201c797ef282b1b2b66724ebb3c3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:07:53 +0200 Subject: [PATCH 206/287] moved dependency version in external pom --- dhp-workflows/dhp-enrichment/pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 6f638b188..644ac2140 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -47,7 +47,6 @@ io.github.classgraph classgraph - 4.8.71 From eaf077fc34bf43f8b2b67ef5a9a701fd78a1eadb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:08:58 +0200 Subject: [PATCH 207/287] GetCSV refactoring - removed not needed dependency --- dhp-workflows/dhp-graph-mapper/pom.xml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 6c5e4609a..17146903a 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -122,10 +122,7 @@ org.json4s json4s-jackson_2.11 - - com.opencsv - opencsv - + From 964a46ca210afa179b2a1735f603653f99a463dd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:11:18 +0200 Subject: [PATCH 208/287] GetCSV refactoring - modified due to movement of classes --- .../dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 71b20b356..26b1d5993 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -100,7 +100,7 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + eu.dnetlib.dhp.common.collection.GetCSV --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --workingPath${workingDir}/unibi_gold @@ -112,7 +112,7 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + eu.dnetlib.dhp.common.collection.GetCSV --hdfsNameNode${nameNode} --fileURL${doajFileURL} --workingPath${workingDir}/doaj From 01db1f8bc403f1456d7004489af2291282721278 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:14:17 +0200 Subject: [PATCH 209/287] GetCSV refactoring - removed not needed import --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 2ed76a72a..5b00e9b6f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -1,6 +1,5 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap -import eu.dnetlib.dhp.oa.graph.hostedbymap.{Aggregators, Constants, HostedByInfo, HostedByItemType, SparkProduceHostedByMap} import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} From dfd1e53c6982326338abca5b5a59f0d1e2b58d61 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:15:12 +0200 Subject: [PATCH 210/287] added external dependency for version --- pom.xml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d31ffb88d..a5e9f0547 100644 --- a/pom.xml +++ b/pom.xml @@ -524,7 +524,11 @@ opencsv 5.5 - + + io.github.classgraph + classgraph + 4.8.71 + From 32fd75691f17af421c8f5e7c4a8b4a0796c4cb9c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:15:42 +0200 Subject: [PATCH 211/287] refactoring --- .../project/utils/CSVParser.java | 47 ---- .../project/utils/CSVProject.java | 200 ------------------ .../actionmanager/project/CSVParserTest.java | 31 --- .../project/preparedProgramme_whole.json.gz | Bin 15986 -> 0 bytes .../dhp/oa/graph/hostedbymap/GetCSV.java | 74 ++----- .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 112 ---------- 6 files changed, 21 insertions(+), 443 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java delete mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz delete mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java deleted file mode 100644 index c53cd2127..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java +++ /dev/null @@ -1,47 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project.utils; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVRecord; -import org.apache.commons.lang.reflect.FieldUtils; - -/** - * Reads a generic csv and maps it into classes that mirror its schema - */ -public class CSVParser { - - public List parse(String csvFile, String classForName) - throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { - return parse(csvFile, classForName, ';'); - } - - public List parse(String csvFile, String classForName, char delimiter) - throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { - final CSVFormat format = CSVFormat.EXCEL - .withHeader() - .withDelimiter(delimiter) - .withQuote('"') - .withTrim(); - List ret = new ArrayList<>(); - final org.apache.commons.csv.CSVParser parser = org.apache.commons.csv.CSVParser.parse(csvFile, format); - final Set headers = parser.getHeaderMap().keySet(); - Class clazz = Class.forName(classForName); - for (CSVRecord csvRecord : parser.getRecords()) { - - @SuppressWarnings("unchecked") - final R cc = (R) clazz.newInstance(); - for (String header : headers) { - FieldUtils.writeField(cc, header, csvRecord.get(header), true); - - } - ret.add(cc); - } - - return ret; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java deleted file mode 100644 index 268d5f28c..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java +++ /dev/null @@ -1,200 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project.utils; - -import java.io.Serializable; - -/** - * the mmodel for the projects csv file - */ -public class CSVProject implements Serializable { - private String rcn; - private String id; - private String acronym; - private String status; - private String programme; - private String topics; - private String frameworkProgramme; - private String title; - private String startDate; - private String endDate; - private String projectUrl; - private String objective; - private String totalCost; - private String ecMaxContribution; - private String call; - private String fundingScheme; - private String coordinator; - private String coordinatorCountry; - private String participants; - private String participantCountries; - private String subjects; - - public String getRcn() { - return rcn; - } - - public void setRcn(String rcn) { - this.rcn = rcn; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getAcronym() { - return acronym; - } - - public void setAcronym(String acronym) { - this.acronym = acronym; - } - - public String getStatus() { - return status; - } - - public void setStatus(String status) { - this.status = status; - } - - public String getProgramme() { - return programme; - } - - public void setProgramme(String programme) { - this.programme = programme; - } - - public String getTopics() { - return topics; - } - - public void setTopics(String topics) { - this.topics = topics; - } - - public String getFrameworkProgramme() { - return frameworkProgramme; - } - - public void setFrameworkProgramme(String frameworkProgramme) { - this.frameworkProgramme = frameworkProgramme; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getStartDate() { - return startDate; - } - - public void setStartDate(String startDate) { - this.startDate = startDate; - } - - public String getEndDate() { - return endDate; - } - - public void setEndDate(String endDate) { - this.endDate = endDate; - } - - public String getProjectUrl() { - return projectUrl; - } - - public void setProjectUrl(String projectUrl) { - this.projectUrl = projectUrl; - } - - public String getObjective() { - return objective; - } - - public void setObjective(String objective) { - this.objective = objective; - } - - public String getTotalCost() { - return totalCost; - } - - public void setTotalCost(String totalCost) { - this.totalCost = totalCost; - } - - public String getEcMaxContribution() { - return ecMaxContribution; - } - - public void setEcMaxContribution(String ecMaxContribution) { - this.ecMaxContribution = ecMaxContribution; - } - - public String getCall() { - return call; - } - - public void setCall(String call) { - this.call = call; - } - - public String getFundingScheme() { - return fundingScheme; - } - - public void setFundingScheme(String fundingScheme) { - this.fundingScheme = fundingScheme; - } - - public String getCoordinator() { - return coordinator; - } - - public void setCoordinator(String coordinator) { - this.coordinator = coordinator; - } - - public String getCoordinatorCountry() { - return coordinatorCountry; - } - - public void setCoordinatorCountry(String coordinatorCountry) { - this.coordinatorCountry = coordinatorCountry; - } - - public String getParticipants() { - return participants; - } - - public void setParticipants(String participants) { - this.participants = participants; - } - - public String getParticipantCountries() { - return participantCountries; - } - - public void setParticipantCountries(String participantCountries) { - this.participantCountries = participantCountries; - } - - public String getSubjects() { - return subjects; - } - - public void setSubjects(String subjects) { - this.subjects = subjects; - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java deleted file mode 100644 index dd7e1910f..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java +++ /dev/null @@ -1,31 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project; - -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; - -class CSVParserTest { - - @Test - void readProgrammeTest() throws Exception { - - String programmecsv = IOUtils - .toString( - getClass() - .getClassLoader() - .getResourceAsStream("eu/dnetlib/dhp/actionmanager/project/programme.csv")); - - CSVParser csvParser = new CSVParser(); - - List pl = csvParser.parse(programmecsv, "eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme"); - - Assertions.assertEquals(24, pl.size()); - - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz deleted file mode 100644 index 620e1abfbf28cf33bc6307fc9ada17ce4a048a71..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15986 zcmV-&K8?X2iwFpXn5JL=18{O>aA9&~WKeQ%XL4a}ZDn6~Xm4y~E^2dcZUF6l-E!MV zlJ5OJ1!69?JrShGq9n`hJ2p*?*<;HJQQnz}*oX}fERq<302@D2Yj!tcVqf80o%25X zBJ(6Cvwnc8ViiSz_#xfC=xB>1P+3`3S^4|rfBr7^!r#6B-P^a97iVvN*ZZ9p%?bSd zaqwpFra%7r`uz39>r40#zDoTx!0#saADp2dFJgC1;NxT!#pyl&4*m|@aGAMF{ELM6 z#=uPyf8l#>>PI-%H1d3sx`E@Z+#n#~k|fT5In%Wpr#e7TFbw=|^l3c2nf~do1NgT_i8|7MP{QQ#M)G)jz4O2S)9WVCRr04iB;1Y&TV|M|BJ^10$aba>PiQ_2tlQsM#^nz@TUu5BPR@=Rb43 zhXDT&zfT>1PTYikIU|8j7DIMQbm#8H<5 zv~b2*9Bl~w*iRG3^HcxtB*gc1z=={@U~9e|kgV*m^S5V~BY39nvCl3e#*aOq4UWwt zS@~W-*t;J|l8^wFA|P%^V*1%H8O#y>{RES8{PiY|9*IN3M?a3jH3^Y78xk*~c=FMFj$s)23H$2NpT%r1wSNBZND^g0kwb7zd>|dyhku=}+!W|eHn3+C z_)D20JbX-it1PX)5e_Ij|LpwAu=CGg=by{_{*L$|jO5`InA~wgoGo!?jK**`*{HLw>GtnxU9Wn0 zt1##C{NmhlBfM2_ggbWR@W8_fg?}Rfz;w8D;ErBo1XciOaFXXFC2Noj5nMq|mJnPg zNq)GSO~A=ddd}0z4+vgzG2C19Dq9ghb^?C^7v`GaTLSWk5(9n-bMagpbG-q90rDb+ z52El9=FPSMXcDV13ZkXIy-3Yp9M_|d1LsvS(`fcL)NDZH{hltz$?>o91@et?LOn3& zCTTLD3e=T)nS9QI)bG!IIN3mH=*G`Ta~!$e3RnhW*GX_|iHY`{B-`N3V4|~UXO7fH z8@vG%ocIa^Uy}MBe3AIekV-&z{)l(t+<~hxTex_@>C_{T;ckFeBHXr%ehj*x3&Mdt z;%-uR2E0-RcUJBrKAJ?E6&;3dZz^Hoz5Mo$77IcLK*j~+S6HmjV;u9Temiq8e*2dQ^c`cL2y49Pwwpe8Jz@uNpS!Do<= zkfcvhjEY8;V?GJz8OTE)))~|g)T&qh1~%rW;t$|A34u#O)%J@=+#}(N+A4HL{JQKp%-E;jr*H?s zzg3$arU@6SSTM{8WF>xmmkErwWU- z)ju#PLMdBhFxmz-04PI142<~XQ{wT2)xGU{%oV_Tkg>&B~&>|s)?U6V<-!8d1jLTid?z>ICjQg z@4noQhd0jTW_mxl|9U@mzFfl}H(&m3ct81a;|y;;IQQex$D1#ozWnj8&MVw3`jX`C z!$}B=VVZ$m4Vz^WhS6i85a%lJbN-SS7v~TPZT_-Kv?_jpi#PAA(M)BRPlg|EoohEq zox3QFm{+{xsHT~sdZL@AcPo2xaB05p&gg&g&IE;rs-R#Rf$BqK?TTAH_|KRu@d&Wf zu_kjkpka++9YVD@YFsWS3a=x-L7(Nj(lU+82dWu@ zR1^5pqdxe{` z6WA!YE+IW6e~#i{4(9L#4@3fk_VPOdh{PRrM-~BdZ97xRM3g?3&1x?TfFUsS3KXc+ zxA$?Y(Uq{LHQd?VMpI@+#kz?SKg?-1)6!mx+!_DV2|8s zikwnv&^X{fm(Br3vwUP(2ok?XiJy}V3Fky4PDqmlg;%*zA1=Q&n(cRBqlC zr=~hC-kOg0fNrpJgI|ruBXg%8h>bNmwAnI*ZW!fL+`7}I`#sa{F1(S4XCGCYYK!~D zK=WR_RRz1nwXr2!?qi?^qa5+l4gExfsW-6;LJc&s9_+06(HAijn%06u(flemCQe9D zZ^O$51K^n-piE5s0NoE!?Oe#-Y?y>R+&_ZW^M8I&5*0?jLdkmxfOqzNr-s1#&@*usfi4b(;X4HF?1W_He3UL&zx72+q2)l2Ma4n`-MajJ`*}{8UqO(IBlmZ ze~}LETt9Gr{z_GlgJ)1Zx?MLdT9n*oY-;z}=R7bgC55r0fW+phY1^^#y`MXZi22^s zb{{w2lT_8jG}bGe(<;e-389 zp)ud%JW1-#eUdmLd9YAWQ-=8T$fVJe8_!`1&XNiNAs34&fjzK_qingt@A`^wrvgt( z9s7LdG@C~*d>Xbwt{Q_rv&W#{DVdC;FHB&evzj>05bUCEW6&0gYKcJ`->o`{-um$?XM!5yc=qP=ysWY`Cv~$uQ~lS?Ki4c1AZP zI+|$SEIUI<;e@2uf}f;dAG45aK;x}$uGDNYI5%FU_2acof1ccbq$v~EU+x^H5C0YK zt)ri@ZGUhIX(0#KlDHe_`2^;rB`2Gs=vS~m)zDE)Blygl@k5fkMm%aqd9j~xAu{(9 zHv#&NxRSf#Yz|H3tu(AwS88%LoYAXVy{yw12!TZp#g(_p))=*MH-5@eL@jlfm6ok! z9q4t0a}$(FSCo>&Y9mnWXFukXib-MNA2#tui%FX!T=PnpLz3tE9vRd8aKkFVkgmrM z(e(3pYe$rMu(Ap-Sv8Z8zKsUqh=&i0q7zrB-pbgfyJc`@p)l2l!6Sjq)`NB8F8#uP|9axW8a+j^_maBg+{pPSc?*Ytj93 z+sc$Q5n;OY2Yl~s+kAY~2a@*ZG3p`Gcv}JFy&h>?EbK={Ljqx+!PPQfV}$>l7Ww|D z zwwmE+WMEuI>;9Q$<+8#o_tEtOcjo8XdX|u#+6#QQal`*01^z`qs5hsQ;>BpAyqNnj zXvB`N3-Z#jS}C-rxVK4%24rP3VPr{GrwOqo^R$;AiKTO>$)edF#z3c5**CfQ@O65B zHyM6%K8=ST#&^??lUtZXd1t52*#QP2&aI!YdjaAbqP(Y=TKlUI37Xa^majSnde9Wc$=8&*RAiYT{o?X z`Z_dK`=r$!J=mcFnal%fndvd7+&pCWy|foN4o*lju~IPbkV=Y{)7ZwZk-$UFM{Q)N z#>1sKf_J7vc@DwMT>D-e&2YbQS6W2Pz-vMpcM$qI4LeZr z&NgdLf?A=nA3s|8D8?jJzo)9JNbn|(7C0JxkY-N8IRQx*JLxm(u?o}{{~k#p^E`*M z%AP$IB&P2>pr##g>{v_R7rK^(R;~RrTfWNqmYiLK<;==MBwsa0P>~%F?o*LSBNrrt zoSP0B?1u_nJ-B?E%G)ESN=p-2{=y;3mJOu}D~vp`&8b&#>$nRD@?Q-+%BI zzNJQQeN9O-NtVay3`|6AB?|jI&sLN&6`pY9oy=TzCQfo$YRcQ0{>B)m8hmffW#DIU zdQ~@4o6X7lu>0rGvUr6_dtb`1X6wm)pXigiIDSp z&wqSpE^&CPd;f|UHiPPZ^jS?LBJgozhZ#ZwU_dfcnR$ah8s81;TgJNUyYgSji5+#$ zyXn%Yh9{uqmYO~DmpTyoXW}LzcI=|Ul%R}RqMl+)&^)@EvVru{YHaB;!?X&;+LkfP z8r(8U$q36K%CEA#gvl$YoBK$M{=`{J*gtWte(@M=}1FT7Tbi@!1FSleqk9tM_*~x~Wgcv_`o07m6$|J09zi|4MvT|3 z;?SEc=CJ>SOA)jtpPe;~Koh$>dK=DI?F;q$(hoQw6s_MkAntvhj)u${M@-tka zVAenU@Bc*e=qY({`=bm$8RqfY`lv4KL4z$KoN+Gb~Mtlwt zhtB5!3QFq+j#4D~6bme7_9C-|HLd5Jor6s^&b5wlsp^=7>gcuLR-NgcAHOQS2Uvat zjI;*!;sCiEX^P$f0nRZgu4Gz-|lC*4X zbAWTJpX&(3zYlvXVSu40Q|D=?yYXCY+tBK5Y4l(*$aZA`@&GN(gYwf{95ASEQ@YW9 zk8}*-zLT~N5Y_h~Ohd)CwL4I>9aFB0zF?N$9Br~uCkaA2_ifo=8mQ+zYq`yBd|Xe> zGRYy8WgDf4Xi5abPM}5*O>&H9pfzt`iNgHCrb*Msn>1#Gt8Sk=ouG2JJx$GNWJX?_ znG(g9*uo)ph=ATw0lR0;b!?q_WtX=RU3@QxnZEe) z7fm|qD!bG4kf%+JH#eKcB@D^0sUsQ-!YS4wTlnXk6fGj~Z;LbY=8!ja9?z_cCEbQC z4_E_rCmEX)vfACZN|rj70r&Hlu|$a|^gUkBw~A8w*L&wr#6nH>C!)BW&#wmcdI14EhVq#1JH%dVBi%5%LxnR zPV`mjujWgrkp%T7ut<)}EieoUB3X3eC34N8ykS(&MCfuff&wyt9^4*o-@F|)^K*DM zYkBAB)we(`WtgpVrlxAk-8@x~v_)GO=-IfuK>_Z^O&rZL_IfP`!($tkszn|rj~zT< zz>2lqT0sLO>3K{$JaQnT@Gj&H#R4s=1c#{kNQ%%63Y#eOmEL@d1jQURd8#Jdag_Spaxsu1l< zQNDIivluK0E9bg4_!C2Z!zRf|B042+CA;QL)wxkh)77cEs#mpUNsT(+URAVtyJ!9h z!yeoezB)Ia!9T$ojzRlKX(m8UzpO@PR9ZE|kZR?-`%f+1#6Gncnjl|=pno&4cqK38 zyjFp^-oDFG9{O<1czyDA{C)LS8h*HiMJ8SQ%T+3_2Y2pnQog5En?dL1{*QI(irAmZntTZR4yfg&m_1G!c*mWN=#)xc z@O0&-XtZsJM|1sjhmf*(2feA`Lv#>rl@HeeqsmM9W?j8L+lnGFMNBj|Y+qna z$J5~*Yk6)VsCrBF7hbI%086>u#g5WkTn0VW2Hkc!4GdbsWefJ?K2yb*_2c0;b+U5f zJQMyrFe6|G+BXk@Smb0Qe~NKU5|YA5N9S3~U>U2~G!^4~P& zOUXgPBXaFxI1>NjA&6_ESSs<{5mpqUu-JbB$5wn;hI*~lv`Hb~a-28S%Gsa+F9_b*$FFtrt27&InMe&=!HjUCI>0UX zCPU9-Z33LwPad2FLdb2W30Z$t*>teJv7|0BPVZ16YUeDDe-&+<66@4Q?VK>5| zT7Krdx}9F{)%SRMdj~5px%s0b;-sT7trPz9%MDE~wbN!3 zM;nmta|ZB`qp4Dfh)6s%Er=H5`WvQ9|CenDJ}<3W)~vB=m^ylZ`Z&V)wZ+@DO?GAG z$(7m&dK|83wgm|HIRd97MG=GE{*4yyiw1Hs^|fW>MR!)k2R}E@%UX9#}5%gKEll zT-ArZ_jFx%ht2y0D+~1`2pc+$qDNK=t?T=(>PwC>7ozn%1wE=~Zr^gDqvnhm7Gu>| zS0&fmaRlQ;4$wi9?<$;E1M%uHKeq)0&DFrFbP2c6=b30p^@#N7%fRv64U=v89tOb< zGRa*ESciW58RVbP{D6(tlMNC6EpNPCH0x_&?$rW?$2AMpS7kvtd#fkYP19`tTnIYs zT#&k09k>QF3NJ!*CZ<#!5UApL98|Par=Aq%DQ8Cr=?pl|N8w39uTe}kctXOId(gq& z%wIztPDN3R7X<;U#7*Fqh9d=><3%no-3+}nX(S4~SqpCZZ2=9!;<0QzbjHI)zoNf| zC*Wg21JBmjz-g`T7}SeFb7#U{=@+aM%SIWHFQ!;JJeA*CEu390h++Zw#T#$!LU@~+-f4KKbG*SOIMH#-w7pob zm95&UjTIl*-pw+)aY&XSuixf;t0p zaABag^N{^uGd}_z`{6gy1P`Gw82-g14}3X4N0{&_oqj2J^XjLcCjFo9C%@OcgF07Q zbW%1n2QBx;(f)@A&6aneQd%v!D zRRftkb)ptaV@N2)+}(NQoFvZ7z{K_(4G7h-3GuNZqZEf3+YRs15+Re>&Sv zXyQjX z6zV&(%n#@#4S%5`9|Zmq=g7kcOx)=k>g6{#9GR(gYSqFjd+37nKq4s%bwasQ>bFNpZvc!@i`ZZ-8*jaid4$nKYRBp<5GzH8h zXWCkoaw)phQba=mT3(7M-?CcWT%~*^YZcy(1R(n}nyGExtXhxgX$p$t63$9kSg4|N z<}8Y_>0tp{e!O;b*=ZbV1eY66T^I{O!lhboCP4#XK21zkyhcGfh-B_8Sw^3dV&gr%9nykF?j8(!k=SGd6g=vhvs+An^!TX|=CXHQWsoaBr zCG&=990d*FY1El~Uuh!E=HFK!Nxf){m2{V{i76=(l^K^-2m2CeJ$@=_J26SJAf<^X zn`{>NUhv#sVA{$&cZFA4-rX&L8z8Y-uin}T(ehZ$ro_2=3|~%^)R$ndC%Vm0t*p82 z@oMdEwDqD=W9QY+1_rWFbwL@B4mv0ODRl^k86M);*|HN}hFZ4C=d4ULk98Na zqB^fyOxtL6W*X+u_5sfgedSIcp&IAUG8j3H=;Fx9rii7W6k*~I)J9*x7e=@^6HhKs zS+B9uy%5}dVQv%iMuiR!I=Gc%B zG!*xt$LmCK_UPW7lMM;yd73s%Bterf@UhiPH!Gsh$h}U%qOWepmEdFP8C6@ZMv2m* zil##}(5n$JHZ%SuLxXA|@RVqJHOtT9-!RN7dZh3Eb@JZ%;cJL?GyfVhvEmG=XQy7d z7>cOp4_oS`I_@E1Ni)b6k3F?Gzgn=tP^;Bgqsm8PwJut%N|x{S=p&qdZkR>0zp;o* zQYT8rK9=YXGOF31))%T)rv=^OQjZee(%nFuhcJ2y$efE3){=^XCoL8{Ry8Lct2&T+ zQ87OQ%ywD3JN*Rvn6d$DW@k+{^-fMSK)8m0XqLujG-@qd?Rg0jb3@Xqw**vTsknB_ z^4a&Hb}PQ7lmI-q`%%|o=~g+?G!L&S(sTKGsobgK5cWf0fpMl@_I(g zRU#wz%l_!(Ba(!pXPn(nfeBnwtZi-MFV>a`p+`iYh2t|R8*?|}>g<*1;j2VccR zutJF=v-b97=2kh5^f@Q54nj1=&X&IU&e)NgB2k&9O-J zz^FmnxiL0v(aAedpDGvY)t*nCA8OU_nYHDlnvgNM{{XC6qSvH-d8af(R0yf3zoPU? zt>oNRe(_cnGq*HNlfp5aRFOH;=B=bRT!6lFbPX-dmIq>nQNkC7Pwq2wKjLM|F8*IKR9#cv z2TZL}0&7RQ!+AE8NZVyA7d?^~E^>VN%#C9oOmv5KFZ8JpNU<9tzoRjy|N9d0RX) zoQ1qZoLh8{zANWMC#dm-ej4R)|5y?p1hqzno5|=yE2_S!HkBM$CzYDMY6BG4@SJ%| z9BYgcwmkMMKqX`Q zHhK0|zKhQ4(`n0NA-eG-K;yWou(XudZvojdz!C;!<1WJ}@pB=}{_18Cwjp*L0j{!} z*18aV_8Lyz>KA5b(3uL`vhOk<_vUYWmuougkG4E8Eg^hJ@~F+K(DUGI&Bqj>SUo_k zb8=_g_?RQ@jRni~LM~fxgR9qPiX8d^VzWlL{f3M-L1p7s8XHIz=th`oCeYQM=D8w= zjf`jl2*9vG4z&cDE-PH1 z>F+!(S_D%mmQBDX#{Uq%!GNb*ge?WEpU4Ny67K)wzvnL7A~Cw>gzlOI&+@n)tvMIX z>=+FZo(Jsy5=R^05!c8=QW4R1cQ*Ss$DnHY@81|3wBj&Z_KntFZX#h}7llDc9#*JR zP?|^A52Re!-zeU4bp{;`@N(5DXmFA?M4=bW@|V-ux6%c~r;R1hmt7xDkyeRpj=r7i zDjj zAu}hNAbQSW(4q~;Q^f-@8QpW4P5SM({FTykl|ZAl$uDvWr_DSl<*zw(whj0A zZIE`&-mvvE)Ovt@CJ2WYEx{lJ;exFlK`tizwx_KX-FVJFp!EEbxm1vHAiXWqvW1Hm z0>x|wLf5_*M}4MByJ5uY0;3RT#0v3JH(nB!tmTJ3!bap$g5yMpWo1kfg@Qi+(TV^8 z5dySFSt65J4socIX701ZG{51j754eMS+k0A#;a=&(qOx%gC&1vwe1B9}Lik|NWn69z7)wZhw^F zC&L_1a6bx5m{fGg{`4WRjcA^rmRezsBM!D3)a5{81q6~>H^g9pmQghSwo=?J!#Dy* zsRUD6Ohs>I3aV@QM>9TMy zEwK}9vgH}p1vsfPM_pb^2JO}YGnJi)dREwvu-)oaDv#ji3)SK-9JasUWTmiJxL_{L zjiJ9~rJ~QE@sl(+Ys@_A>LP4vMw-fMx);y3`r0xKBB~#AO&Dg*LBVI}TcjO5s2Q1u zhmA_?DF0=1>T6YMYgcHc$!cDF8}6jWcQyOOl}}bL#{lgY77uO&Z2v*lev+^d6}vii zXA-$(-*pVG{2E!)xljL))lhm^4bIJ2HrT^TO))n0Svr3YTQ*7agatP29SM9Yl0N#& zl{0eT7~v(VM#tLsQ0&z?>aZC#+tun2+Ej63?96*c%fVG(enNAAJf&`w+x1$Uy6tGO zku(cExhsOZ4&#sxiW?~^!m5^c?l;{=!^Mr$ubRG=z71FA*7e@=3{4hGZuOyAE%mjb zw9rozer}JdD23XP=h>>#nUYuXpF5k=7XKys48-pmG zb02LwfdiBgDYuEK1?W;IP*xg`I@HItXf|Sks1KG zIWPWPH|jpXIl!(sc)ejr+X*oK4GOp$ute$wx5TeFR?l-9=4+ zNS-M5n(%<%MLeF$K4)PWxhO{5oT$HHS6CM>d0D!MN7IPbHLT&tWNX&kgQHz@pDp6} z&4ERrC)x;VS^V#S?f$+Nm2a=_T;0W04I9vW`CjTBY>MD1b1}_Wl`6Jjj%n5?g#Zej zRfI^&G^vN$Ltu`2MCbr4Rc~&@f;FH-tY{?%z?bukTzo1}WG*hZ;m8 z?0Rylzv|MSKdI<9xwDjdNfxe%^T*LFN|X1_9sVD)z3BKfafiZ~RoZS{=O`XB*<;NB ztUf1I%2}+%w|Z#|Q>K1MV1R(j;bjobumBQcW(X;l!epOeD=#rk9`A;1jmj5lU5pI= zz~3^2im%aF;jXL+sKI!Wu?57^`EwKpbEl6Xt02R|pq^|!45>eO5paN-sqCc^4=BE? zM&@5iEJ+iQkan2#$S0C?F%b<;&4z)X?5BHL?=9e15&TC&6zj&0eTk%MChP9Es@90L zk+@QTRN#wF@>+R^H8RhI#ItM_zxwH?N&n~jNsT~h-Q*=4H1kdnU`x}fMYC2Is!sBbB~fo^#JVlBg+334|RbEU@0eyvJ{LQ zM(87mNi6PI_#~Lqz7TvO&&&?z!%0sAkuH)51LCU_l&VEv(!3hQ-Waxq+}!W0)#Peb zOCA2cCe5vh7Jauly);CRYah8<#KHuK6R^?KAxDnPL)8E%Oi6BZjsR1NUR7f;f|PF+ z3e^a+`Okm+ALmBivWEOu!}sl~tp);G)JC^P*P5ExfO9o9^E$$s3UM;z4i#z#iUOhS z)a}U@o0wE$?ol@_sW2lAlgxJ446LQSQg=}bdggj7LaRI6`8>u#54!AqxRF^0(t5kpW)1zjAmgiv3s>D18_2O1qVl$OOYKIMjO0E2`F`S71u7T8m ztEG%Cju{XK8U3bwmO)tc3sBViz?XCThzJY+1ECMQN~ zH^?3d={|;!Smc_9v_D*o~M=i{?RPtA+svQ}%cj37B6iH>4b-nz;W9bwaRK78E6gq^pQSCFKwp zx@Z_3`nGPZ9`!zKienubR;Vf*M!p&Qx&a&O5$N6DD4;^5+*qWP~Mw~ z9wge(MQDDagA?c)=&|DIUEPFVjg6(+pLva4vDzkHHT$iptpl}`MqtV<2}P4$yFrgh z@!V=eav0L|R_a`|sg99RUB~$pJ@xTQ>e0eY&y8nMm?!fwuULVVci_CLUGMEts1==? zG6>GL@R(D7odxWgSFr#=eudQ0?8Q-%^cMsy|3H%7IbO8B;brzC7=C4xg@MQWr?jXGJ5)bRxT2_q>-Ri9l$K9|uuy1J&c4}_Z z>gg?Q_+0z#w<0?oo~YRi2DH@-tiNd8*V2VQ(|!T8kdz7#Rw%#)t5z7myIauA!cFvq zmA=e<2{M}ex(OmbW&N)FB)2mc8QrJ+Q4Y&fF~Gp5Kt;*ik3sDc$a*NOyPJsZ&N)hQ zRaNqxt$T9CSj1ExK@1 zS8CJgfn^GJtiNSnG=NCZctbauwN>A|n`sVLE0J20Ut8?#%-+vOD@j?HN=@*0bZF6` zC>Ee8;>Ymq--sDYwd45L{#|XG=1S{rdYhe)7Pd8~6I|*}tLrSLmMg5?HceZBMdk_) z>4^L{MXa%hU;D8P@t82*FwcMzF-iU=_$jS!z$`JQS_v&l2B2VN4Ta%zWZSv*aPBEC zSW98v(_@X6XKr>2UFg?^BuB-N^Ef4+U?BIFd$4nc7}fJ#qOd5I3XE`>?P7_ zK|9+$h=NC0?bilD^(b&LkpI_)9NwH%Z zjbq-34@Xg$BEa@7h7+oJU2gASqfc)B=#2j|8l!u5G^Vw0f1Z38-{9xB!@K**XmUHe zpM1HYan_dSWLu$444acVu9@=1OTM1aoksH}KyHB@y%B0t%ap5Xx6wJG6~~ArxOYS; zVjonM{1zXtNlY;UR4kM0@%>i){@zbjQquQ+dkJlM&v$B>EM{?XZYdNfGDdoZUyI%G zN!ph$`Xmb6Sc9KB=2VtLC)PqP%V8v{ve$@yaG@4gj}t^I@!{@*o%GLt{J-%X#enhS zE3!`ANRc;m;&;Vi7V+PiMRzWi4* z(B0v)Z5p1h5wf*Tg(lO9ImFEUG)y6v9N8BRiyD8Rkdjn84VnLFYm?kneoY6SGe_#8 zQ3E5|_QI(lr&7eKyT*_gin~S&9U8hO*9HwM;lG2Fn0_9cnkScMvGiKs{#xz2w}CRA zTVV?H4i8c?0Z5GJ{sK;FI0w5q-(>DDnaeMZQ&-BrhC$>#h`V+JvJAVv2lS?eSz_Ok zz^A!wMdi8@@jjG$E#BL=ZO=|acCl%qm#VjYL@Q#vEKL~&Ay$p?3=>(LM3FwPbfR^Z=Z)lGP0|Nc9AJ{)Z1O?x?u9uYzG zCT1D=MGBA$tf_w3Qh(WZX_10iOrOOgp3I4DJgxOM>wx&9gEjkdpSZPIePhInQL* zYgL{ZGiX_34F!pNJerO_)oBT})g(g@eyj~I7=2ojmvpBxSK;|~56@2uWUk_<+n3-L57EKY zYLOq4{&q&a9a`k)I&hapm!JM+^D!WKw59?<#Tc6N-RvHEt1|4){ zvOL3sQZ*y#6Y4+tGuV3vWjb?bxXz5@zbmk78<5eFv5<1uVTEH;*@#z2_7(#oy%kW?@aO zqH*dj2<)49&X$y2|kDA7-w5Q6~lN~h! z>MK4w;%TaP?>lqS=wJsXro~>;H_)EXoUTD^6$D=J4iw_DsK&xL9KDkKX3bai2FAeUZrz*8n0XU#dPS??}`dM(SakaWhM&`|(}{ZjS;G3CyQzy=ko8TdnmXSkO&aF7x#n3* z0xL?A5YZ@k@Pi;xV7rT}(5FRB0V-p@0ML@IM}3VykETOj2vs~&HSY8>0(q?ysTGLa z$OjEK(w3g3T>)H=an|s*C6gPXp(;ADy&$7%QIf*{Nx~HDMdW1(I*MT{VSzmJA5&ih z^ZlTO$X{}7iBo$cEw;dz*3lyypOc0TV)Oba^5Uxkv$x{X7{xA=x>T+)N!5+eFGpZC zAzo$-Eyv63fVfoiVs_`h9or#&iP%<>0(G6*)=EkP{kS2&rp^-Uzu5sppKOLF@_mtq z%k(HLYA?Q4;UFcgOFo%i5M}e?nNw7_TjtNPYU8c4L2FfF6U$ex&)z8M{76RJjnFO8 z9V92S@jfJcBn>m)tZ7|nRpLN|2!+D5sg+Mw?xPz#L3@Q+rh4 zIjZE^eK3fCJ;!O@pWNVtEBlW3WsZi=Z_A?AP>H753~z9%l`|uYC1s1GjoitMjU7p} z;c>Y|Oi+Pq-{XG}DF4BeZ48P3K*}@>bekEa7*XSp_X5QC%Z^HpnuJ*0QP}68FgP0i zSvd3bnu2XdYt`3usm3+Hm6{a4s{GV2C~X?yr54mSR@|{s`$ZH>zSy_EoMnE1S35@# zcWHHrQc=q7Gkxn+7(5@Vb#+oIR133(i#|y45S(L-vhXN-%N*^dRXv0Edd845`62)>!mh2BZdOMuIA`2AV(s1uoxdyvInQx07o#4{9GdcCH zy)xS+S_4Jl!@{Ne8p5zY!2bW0#zBKZ4GZ)->CrR7*0lrHpER$02p= zOC~sW&;gS<3aPe87lcXA_n^uw!7OCK08G&pLYk~+WIpGnx}-=U5nb ztgfBo;E#$ye`d#^R}s_iKRDCc@o}pl&MSn}gD)?=QfrU7qld=Dc5D-@0&GM}!zXz^ zp-PI&t|HIfV^t{|-StKd3d!v9(24^q008M++6!0b)N+Wb@#WaijI=Fb!5%zU<;Pt# z10Fkb;<1i%%xA|g?FwV z37V{~C)D}U$VDdBdk1(xjq|y`^yvX)FX$EZwD`0~i#ZF0eZEfB(z>2hC^44Ad clazz = Class.forName(classForName); - - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(clazz) - .withMultilineLimit(1) - .build() - .parse() - .forEach(line -> { - try { - writer.write(mapper.writeValueAsString(line)); - writer.newLine(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - - writer.close(); in.close(); - if (shouldReplace) { + if (Boolean.TRUE.equals(shouldReplace)) { File f = new File("/tmp/DOAJ.csv"); f.delete(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java deleted file mode 100644 index 89259d814..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ /dev/null @@ -1,112 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.hostedbymap; - -import java.io.*; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; -import java.util.List; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.opencsv.bean.CsvToBeanBuilder; - -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; - -public class TestReadCSV { - - @Test - public void testCSVUnibi() throws FileNotFoundException { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") - .getPath(); - - List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) - .withType(UnibiGoldModel.class) - .build() - .parse(); - - Assertions.assertEquals(36, beans.size()); - Assertions.assertEquals(1, beans.stream().filter(e -> e.getIssn().equals("0001-625X")).count()); - Assertions - .assertTrue( - beans - .stream() - .anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica"))); - Assertions.assertTrue(beans.stream().allMatch(e -> e.getIssn().equals(e.getIssn_l()))); - - } - - @Disabled - @Test - public void testCSVUrlUnibi() throws IOException { - - URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); - - BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel.class) - .build() - .parse() - .forEach(line -> - - { - try { - System.out.println(mapper.writeValueAsString(line)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } - - ); - } - - @Disabled - @Test - public void testCSVUrlDOAJ() throws IOException { - - URLConnection connection = new URL("https://doaj.org/csv").openConnection(); - connection - .setRequestProperty( - "User-Agent", - "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - connection.connect(); - - BufferedReader in = new BufferedReader( - new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); - // BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); - PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); - String line = null; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - writer.close(); - in.close(); - in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel.class) - .withMultilineLimit(1) - .build() - .parse() - .forEach(lline -> - - { - try { - System.out.println(mapper.writeValueAsString(lline)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } - - ); - } -} From dc8b05b39ef225c9898acc458b916331f239c25b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:18:25 +0200 Subject: [PATCH 212/287] Hosted By Map - changed the association with the datasource id for the hostedby element: there is no more the need to compute it. With the new HBM it is already the id in the graph --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 909e0f077..efd5d2497 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -231,9 +231,7 @@ object DoiBoostMappingUtil { var hb = new KeyValue if (item != null) { hb.setValue(item.officialname) - hb.setKey(generateDSId(item.id)) - //TODO replace with the one above as soon as the new HBM will be used - //hb.setKey(item.id) + hb.setKey(item.id) if (item.openAccess) { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) From 3359f73fcfb2ea24aec49681572ef8d606b22af6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:00:42 +0200 Subject: [PATCH 213/287] cleanup & best practices --- .../dnetlib/dhp/common/collection/GetCSV.java | 51 +++++------ .../dhp/common/collection/GetCSVTest.java | 56 ++++++------ .../common/collection/models/CSVProject.java | 5 -- .../project/utils/model/CSVProject.java | 5 -- .../project/PrepareH2020ProgrammeTest.java | 2 +- .../oa/graph/hostedbymap/Aggregators.scala | 10 +-- .../dhp/oa/graph/hostedbymap/DownloadCSV.java | 85 +++++++++++++++++++ .../dhp/oa/graph/hostedbymap/GetCSV.java | 75 ---------------- .../SparkApplyHostedByMapToDatasource.scala | 2 +- .../SparkApplyHostedByMapToResult.scala | 4 +- .../SparkPrepareHostedByInfoToApply.scala | 6 +- .../hostedbymap/SparkProduceHostedByMap.scala | 2 +- .../oa/graph/hostedbymap/model/DOAJModel.java | 1 + .../graph/hostedbymap/model/EntityInfo.java | 61 +++++++------ .../hostedbymap/model/UnibiGoldModel.java | 21 +++-- .../graph/hostedbymap/oozie_app/workflow.xml | 6 +- .../dhp/oa/graph/hostedbymap/TestApply.scala | 4 +- .../oa/graph/hostedbymap/TestPrepare.scala | 14 +-- .../hostedbymap/iteminfofromhostedbymap.json | 40 ++++----- .../hostedbymap/iteminfofromhostedbymap2.json | 40 ++++----- .../graph/hostedbymap/iteminfofrompublication | 4 +- .../hostedbymap/iteminfofrompublication2 | 4 +- .../oa/graph/hostedbymap/preparedInfo.json | 2 +- .../oa/graph/hostedbymap/preparedInfo2.json | 6 +- .../graph/hostedbymap/unibi_transformed.json | 58 ++++++------- 25 files changed, 281 insertions(+), 283 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java index 44f4121eb..44e19142c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java @@ -2,17 +2,9 @@ package eu.dnetlib.dhp.common.collection; import java.io.*; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.List; -import java.util.Optional; -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -20,17 +12,19 @@ import org.apache.hadoop.fs.Path; import com.fasterxml.jackson.databind.ObjectMapper; import com.opencsv.bean.CsvToBeanBuilder; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - public class GetCSV { - public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, - String modelClass) throws IOException, ClassNotFoundException { - getCsv(fileSystem, reader, hdfsPath, modelClass, ','); + public static final char DEFAULT_DELIMITER = ','; + private GetCSV() { } public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, + String modelClass) throws IOException, ClassNotFoundException { + getCsv(fileSystem, reader, hdfsPath, modelClass, DEFAULT_DELIMITER); + } + + public static void getCsv(FileSystem fileSystem, Reader reader, String hdfsPath, String modelClass, char delimiter) throws IOException, ClassNotFoundException { Path hdfsWritePath = new Path(hdfsPath); @@ -40,26 +34,23 @@ public class GetCSV { } fsDataOutputStream = fileSystem.create(hdfsWritePath); - try(BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))){ + try (BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))) { - ObjectMapper mapper = new ObjectMapper(); + final ObjectMapper mapper = new ObjectMapper(); - new CsvToBeanBuilder(reader) - .withType(Class.forName(modelClass)) - .withSeparator(delimiter) - .build() - .parse() - .forEach(line -> { - try { - writer.write(mapper.writeValueAsString(line)); - writer.newLine(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); + @SuppressWarnings("unchecked") + final List lines = new CsvToBeanBuilder(reader) + .withType(Class.forName(modelClass)) + .withSeparator(delimiter) + .build() + .parse(); + + for (Object line : lines) { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } } - - } } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java index d24083e0d..c45f83828 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.common.collection; import java.io.*; import java.nio.file.Files; -import jdk.nashorn.internal.ir.annotations.Ignore; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; @@ -20,6 +19,7 @@ import eu.dnetlib.dhp.common.collection.models.CSVProgramme; import eu.dnetlib.dhp.common.collection.models.CSVProject; import eu.dnetlib.dhp.common.collection.models.DOAJModel; import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel; +import jdk.nashorn.internal.ir.annotations.Ignore; public class GetCSVTest { @@ -34,11 +34,11 @@ public class GetCSVTest { String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); @@ -49,39 +49,39 @@ public class GetCSVTest { if (count == 0) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); Assertions - .assertTrue( - csvp - .getTitle() - .startsWith( - "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + .assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); Assertions - .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); Assertions.assertTrue(csvp.getShortTitle().equals("")); Assertions.assertTrue(csvp.getLanguage().equals("en")); } if (count == 28) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + .assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); Assertions - .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); Assertions.assertTrue(csvp.getLanguage().equals("de")); } if (count == 229) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + .assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); Assertions - .assertTrue( - csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + .assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); Assertions.assertTrue(csvp.getLanguage().equals("en")); } Assertions.assertTrue(csvp.getCode() != null); @@ -225,14 +225,14 @@ public class GetCSVTest { } if (count == 7904) { System.out.println(new ObjectMapper().writeValueAsString(doaj)); - Assertions.assertEquals("",doaj.getIssn()); + Assertions.assertEquals("", doaj.getIssn()); Assertions.assertEquals("2055-7159", doaj.getEissn()); Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); } if (count == 16707) { - Assertions.assertEquals("",doaj.getIssn()); - Assertions.assertEquals("2788-6298",doaj.getEissn()); + Assertions.assertEquals("", doaj.getIssn()); + Assertions.assertEquals("2788-6298", doaj.getEissn()); Assertions .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java index 0d939ca0e..62c847907 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java @@ -19,7 +19,6 @@ public class CSVProject implements Serializable { @CsvBindByName(column = "topics") private String topics; - public String getId() { return id; } @@ -28,8 +27,6 @@ public class CSVProject implements Serializable { this.id = id; } - - public String getProgramme() { return programme; } @@ -46,6 +43,4 @@ public class CSVProject implements Serializable { this.topics = topics; } - - } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java index 73cea0539..3ddb19636 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java @@ -19,7 +19,6 @@ public class CSVProject implements Serializable { @CsvBindByName(column = "topics") private String topics; - public String getId() { return id; } @@ -28,8 +27,6 @@ public class CSVProject implements Serializable { this.id = id; } - - public String getProgramme() { return programme; } @@ -46,6 +43,4 @@ public class CSVProject implements Serializable { this.topics = topics; } - - } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index 68aacdc8b..680872126 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -92,7 +92,7 @@ public class PrepareH2020ProgrammeTest { Assertions.assertEquals(0, verificationDataset.filter("classification = ''").count()); - //tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme))); + // tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme))); Assertions .assertEquals( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index 8198b197a..ce383292c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -78,11 +78,11 @@ object Aggregators { if(b2 == null){ return b1 } - if(!b1.getHb_id.equals("")){ - b1.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + if(!b1.getHostedById.equals("")){ + b1.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) return b1 } - b2.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + b2.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) b2 } @@ -116,7 +116,7 @@ object Aggregators { if(b2 == null){ return b1 } - if(!b1.getHb_id.equals("")){ + if(!b1.getHostedById.equals("")){ return b1 } b2 @@ -131,7 +131,7 @@ object Aggregators { def datasourceToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { val transformedData : Dataset[EntityInfo] = df - .groupByKey(_.getHb_id)(Encoders.STRING) + .groupByKey(_.getHostedById)(Encoders.STRING) .agg(Aggregators.datasourceToSingleIdAggregator) .map{ case (id:String , res: EntityInfo) => res diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java new file mode 100644 index 000000000..c8329c99c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -0,0 +1,85 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import java.io.*; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class DownloadCSV { + + private static final Logger log = LoggerFactory.getLogger(DownloadCSV.class); + + public static final char DEFAULT_DELIMITER = ';'; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + DownloadCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + log.info("fileURL {}", fileURL); + + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); + + final String outputFile = parser.get("outputFile"); + log.info("outputFile {}", outputFile); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + final String classForName = parser.get("classForName"); + log.info("classForName {}", classForName); + + final char delimiter = Optional + .ofNullable(parser.get("delimiter")) + .map(s -> s.charAt(0)) + .orElse(DEFAULT_DELIMITER); + log.info("delimiter {}", delimiter); + + final HttpConnector2 connector2 = new HttpConnector2(); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + final Path path = new Path(workingPath + "/replaced.csv"); + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { + + try (FSDataOutputStream fos = fileSystem.create(path, true)) { + String line; + while ((line = in.readLine()) != null) { + fos.writeUTF(line.replace("\\\"", "\"")); + fos.writeUTF("\n"); + } + } + } + + try (InputStreamReader reader = new InputStreamReader(fileSystem.open(path))) { + GetCSV.getCsv(fileSystem, reader, outputFile, classForName, delimiter); + } + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java deleted file mode 100644 index d7d369819..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java +++ /dev/null @@ -1,75 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.hostedbymap; - -import java.io.*; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.collection.HttpConnector2; - -public class GetCSV { - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - GetCSV.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - final String hdfsPath = parser.get("workingPath"); - final String hdfsNameNode = parser.get("hdfsNameNode"); - final String classForName = parser.get("classForName"); - final String delimiter = Optional - .ofNullable(parser.get("delimiter")) - .orElse(null); - final Boolean shouldReplace = Optional - .ofNullable((parser.get("replace"))) - .map(Boolean::valueOf) - .orElse(false); - - char del = ';'; - if (delimiter != null) { - del = delimiter.charAt(0); - } - - HttpConnector2 connector2 = new HttpConnector2(); - - BufferedReader in = new BufferedReader( - new InputStreamReader(connector2.getInputSourceAsStream(fileURL))); - - if (Boolean.TRUE.equals(shouldReplace)) { - try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/replaced.csv")))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - - in.close(); - in = new BufferedReader(new FileReader("/tmp/replaced.csv")); - } - - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - - eu.dnetlib.dhp.common.collection.GetCSV.getCsv(fileSystem, in, hdfsPath, classForName, del); - - in.close(); - if (Boolean.TRUE.equals(shouldReplace)) { - File f = new File("/tmp/DOAJ.csv"); - f.delete(); - } - - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index ae1454b47..424567830 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -15,7 +15,7 @@ import org.slf4j.{Logger, LoggerFactory} object SparkApplyHostedByMapToDatasource { def applyHBtoDats(join: Dataset[EntityInfo], dats: Dataset[Datasource]): Dataset[Datasource] = { - dats.joinWith(join, dats.col("id").equalTo(join.col("hb_id")), "left") + dats.joinWith(join, dats.col("id").equalTo(join.col("hostedById")), "left") .map(t2 => { val d: Datasource = t2._1 if (t2._2 != null) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 533e439b7..8b0b45513 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -26,9 +26,9 @@ object SparkApplyHostedByMapToResult { val i = p.getInstance().asScala if (i.size == 1) { val inst: Instance = i(0) - inst.getHostedby.setKey(ei.getHb_id) + inst.getHostedby.setKey(ei.getHostedById) inst.getHostedby.setValue(ei.getName) - if (ei.getOpenaccess) { + if (ei.getOpenAccess) { inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.hybrid) p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance())); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index 6db0ad33d..b7a7d352f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -64,13 +64,13 @@ object SparkPrepareHostedByInfoToApply { } def joinResHBM(res: Dataset[EntityInfo], hbm: Dataset[EntityInfo]): Dataset[EntityInfo] = { - Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journal_id").equalTo(hbm.col("journal_id")), "left") + Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") .map(t2 => { val res: EntityInfo = t2._1 if(t2._2 != null ){ val ds = t2._2 - res.setHb_id(ds.getId) - res.setOpenaccess(ds.getOpenaccess) + res.setHostedById(ds.getId) + res.setOpenAccess(ds.getOpenAccess) res.setName(ds.getName) } res diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index de41ebf28..d0c603e29 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -111,7 +111,7 @@ object SparkProduceHostedByMap { def goldToHostedbyItemType(gold: UnibiGoldModel): HostedByItemType = { - return getHostedByItemType(Constants.UNIBI, gold.getTitle, gold.getIssn, "", gold.getIssn_l, true) + return getHostedByItemType(Constants.UNIBI, gold.getTitle, gold.getIssn, "", gold.getIssnL, true) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java index ba804b939..4b5dc22a6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java @@ -6,6 +6,7 @@ import java.io.Serializable; import com.opencsv.bean.CsvBindByName; public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") private String journalTitle; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java index 1facaa792..1c6c88fe7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java @@ -5,10 +5,25 @@ import java.io.Serializable; public class EntityInfo implements Serializable { private String id; - private String journal_id; + private String journalId; private String name; - private Boolean openaccess; - private String hb_id; + private Boolean openAccess; + private String hostedById; + + public static EntityInfo newInstance(String id, String journalId, String name) { + return newInstance(id, journalId, name, false); + + } + + public static EntityInfo newInstance(String id, String journalId, String name, Boolean openaccess) { + EntityInfo pi = new EntityInfo(); + pi.id = id; + pi.journalId = journalId; + pi.name = name; + pi.openAccess = openaccess; + pi.hostedById = ""; + return pi; + } public String getId() { return id; @@ -18,12 +33,12 @@ public class EntityInfo implements Serializable { this.id = id; } - public String getJournal_id() { - return journal_id; + public String getJournalId() { + return journalId; } - public void setJournal_id(String journal_id) { - this.journal_id = journal_id; + public void setJournalId(String journalId) { + this.journalId = journalId; } public String getName() { @@ -34,35 +49,19 @@ public class EntityInfo implements Serializable { this.name = name; } - public Boolean getOpenaccess() { - return openaccess; + public Boolean getOpenAccess() { + return openAccess; } - public void setOpenaccess(Boolean openaccess) { - this.openaccess = openaccess; + public void setOpenAccess(Boolean openAccess) { + this.openAccess = openAccess; } - public String getHb_id() { - return hb_id; + public String getHostedById() { + return hostedById; } - public void setHb_id(String hb_id) { - this.hb_id = hb_id; - } - - public static EntityInfo newInstance(String id, String j_id, String name) { - return newInstance(id, j_id, name, false); - - } - - public static EntityInfo newInstance(String id, String j_id, String name, Boolean openaccess) { - EntityInfo pi = new EntityInfo(); - pi.id = id; - pi.journal_id = j_id; - pi.name = name; - pi.openaccess = openaccess; - pi.hb_id = ""; - return pi; - + public void setHostedById(String hostedById) { + this.hostedById = hostedById; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java index 0927a136b..8f24cd615 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java @@ -6,14 +6,15 @@ import java.io.Serializable; import com.opencsv.bean.CsvBindByName; public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") private String issn; @CsvBindByName(column = "ISSN_L") - private String issn_l; + private String issnL; @CsvBindByName(column = "TITLE") private String title; @CsvBindByName(column = "TITLE_SOURCE") - private String title_source; + private String titleSource; public String getIssn() { return issn; @@ -23,8 +24,12 @@ public class UnibiGoldModel implements Serializable { this.issn = issn; } - public String getIssn_l() { - return issn_l; + public String getIssnL() { + return issnL; + } + + public void setIssnL(String issnL) { + this.issnL = issnL; } public String getTitle() { @@ -35,11 +40,11 @@ public class UnibiGoldModel implements Serializable { this.title = title; } - public String getTitle_source() { - return title_source; + public String getTitleSource() { + return titleSource; } - public void setTitle_source(String title_source) { - this.title_source = title_source; + public void setTitleSource(String titleSource) { + this.titleSource = titleSource; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 26b1d5993..d7b85b0cb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -100,10 +100,11 @@ - eu.dnetlib.dhp.common.collection.GetCSV + eu.dnetlib.dhp.common.collection.DownloadCSV --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --workingPath${workingDir}/unibi_gold + --outputFile${workingDir}/unibi_gold.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel @@ -112,10 +113,11 @@ - eu.dnetlib.dhp.common.collection.GetCSV + eu.dnetlib.dhp.common.collection.DownloadCSV --hdfsNameNode${nameNode} --fileURL${doajFileURL} --workingPath${workingDir}/doaj + --outputFile${workingDir}/doaj.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel --replacetrue diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala index a48e52702..1bdcb60aa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -117,14 +117,14 @@ class TestApply extends java.io.Serializable{ val pb : Datasource = t2._1 val pa : Datasource = t2._2 assertTrue(t2._1.getId.equals(t2._2.getId)) - if(pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")){ + if(pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")) { assertTrue(pa.getOpenairecompatibility().getClassid.equals("hostedBy")) assertTrue(pa.getOpenairecompatibility().getClassname.equals("collected from a compatible aggregator")) assertTrue(pb.getOpenairecompatibility().getClassid.equals(ModelConstants.UNKNOWN)) - }else{ + } else { assertTrue(pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid)) assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassname)) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index efd598fef..a3a753a8a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -75,8 +75,8 @@ class TestPrepare extends java.io.Serializable{ assertEquals(2, ds.count) - assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournal_id.equals("1728-5852")).first().getId) - assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournal_id.equals("0001-396X")).first().getId) + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournalId.equals("1728-5852")).first().getId) + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournalId.equals("0001-396X")).first().getId) spark.close() } @@ -109,10 +109,10 @@ class TestPrepare extends java.io.Serializable{ val ei:EntityInfo = ds.first() assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) - assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHb_id) - assertEquals("0001-396X", ei.getJournal_id) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHostedById) + assertEquals("0001-396X", ei.getJournalId) assertEquals("Academic Therapy", ei.getName) - assertTrue(!ei.getOpenaccess) + assertTrue(!ei.getOpenAccess) spark.close() } @@ -145,9 +145,9 @@ class TestPrepare extends java.io.Serializable{ val ei:EntityInfo = ds.first() assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) - assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHb_id) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHostedById) assertEquals("Academic Therapy", ei.getName) - assertTrue(ei.getOpenaccess) + assertTrue(ei.getOpenAccess) ds.foreach(e => println(mapper.writeValueAsString(e))) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json index dfb95816e..889daae47 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json @@ -1,20 +1,20 @@ -{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"0001-396X","name":"Academic Therapy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journal_id":"0033-569X","name":"Quarterly of Applied Mathematics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"0015-7899","name":"Forschung im Ingenieurwesen","openaccess":false,"hb_id":""} -{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journal_id":"0034-6691","name":"Review of Polarography","openaccess":false,"hb_id":""} -{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"1434-0860","name":"Forschung im Ingenieurwesen","openaccess":true,"hb_id":""} -{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journal_id":"0035-1776","name":"Revue de Synthèse","openaccess":false,"hb_id":""} -{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"0022-0116","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} -{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journal_id":"0037-699X","name":"Slovenské divadlo","openaccess":true,"hb_id":""} -{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"1573-3564","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journal_id":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openaccess":false,"hb_id":""} -{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journal_id":"0022-0493","name":"Journal of Economic Entomology","openaccess":false,"hb_id":""} -{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journal_id":"0045-6713","name":"Children s Literature in Education","openaccess":false,"hb_id":""} -{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journal_id":"0022-4715","name":"Journal of Statistical Physics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journal_id":"0047-4800","name":"Littérature","openaccess":false,"hb_id":""} -{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"1543-8325","name":"Land Economics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journal_id":"0068-1024","name":"Brigham Young University science bulletin","openaccess":false,"hb_id":""} -{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"0023-7639","name":"Land Economics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journal_id":"0083-6656","name":"Vistas in Astronomy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::91899e3872351895467856daeb798f63","journal_id":"0033-3298","name":"Public Administration","openaccess":false,"hb_id":""} -{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journal_id":"0090-502X","name":"Memory & Cognition","openaccess":false,"hb_id":""} \ No newline at end of file +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journalId":"0001-396X","name":"Academic Therapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journalId":"0033-569X","name":"Quarterly of Applied Mathematics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journalId":"0015-7899","name":"Forschung im Ingenieurwesen","openAccess":false,"hostedById":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journalId":"0034-6691","name":"Review of Polarography","openAccess":false,"hostedById":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journalId":"1434-0860","name":"Forschung im Ingenieurwesen","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journalId":"0035-1776","name":"Revue de Synthèse","openAccess":false,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"0022-0116","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journalId":"0037-699X","name":"Slovenské divadlo","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"1573-3564","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journalId":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openAccess":false,"hostedById":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journalId":"0022-0493","name":"Journal of Economic Entomology","openAccess":false,"hostedById":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journalId":"0045-6713","name":"Children s Literature in Education","openAccess":false,"hostedById":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journalId":"0022-4715","name":"Journal of Statistical Physics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journalId":"0047-4800","name":"Littérature","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"1543-8325","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journalId":"0068-1024","name":"Brigham Young University science bulletin","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"0023-7639","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journalId":"0083-6656","name":"Vistas in Astronomy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journalId":"0033-3298","name":"Public Administration","openAccess":false,"hostedById":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journalId":"0090-502X","name":"Memory & Cognition","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json index 3d2f2e005..b84f4eb86 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json @@ -1,20 +1,20 @@ -{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"0001-396X","name":"Academic Therapy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journal_id":"0033-569X","name":"Quarterly of Applied Mathematics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"0015-7899","name":"Forschung im Ingenieurwesen","openaccess":false,"hb_id":""} -{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journal_id":"0034-6691","name":"Review of Polarography","openaccess":false,"hb_id":""} -{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"1434-0860","name":"Academic Therapy","openaccess":true,"hb_id":""} -{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journal_id":"0035-1776","name":"Revue de Synthèse","openaccess":false,"hb_id":""} -{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"0022-0116","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} -{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journal_id":"0037-699X","name":"Slovenské divadlo","openaccess":true,"hb_id":""} -{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"1573-3564","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journal_id":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openaccess":false,"hb_id":""} -{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journal_id":"0022-0493","name":"Journal of Economic Entomology","openaccess":false,"hb_id":""} -{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journal_id":"0045-6713","name":"Children s Literature in Education","openaccess":false,"hb_id":""} -{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journal_id":"0022-4715","name":"Journal of Statistical Physics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journal_id":"0047-4800","name":"Littérature","openaccess":false,"hb_id":""} -{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"1543-8325","name":"Land Economics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journal_id":"0068-1024","name":"Brigham Young University science bulletin","openaccess":false,"hb_id":""} -{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"0023-7639","name":"Land Economics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journal_id":"0083-6656","name":"Vistas in Astronomy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::91899e3872351895467856daeb798f63","journal_id":"0033-3298","name":"Public Administration","openaccess":false,"hb_id":""} -{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journal_id":"0090-502X","name":"Memory & Cognition","openaccess":false,"hb_id":""} \ No newline at end of file +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journalId":"0001-396X","name":"Academic Therapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journalId":"0033-569X","name":"Quarterly of Applied Mathematics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journalId":"0015-7899","name":"Forschung im Ingenieurwesen","openAccess":false,"hostedById":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journalId":"0034-6691","name":"Review of Polarography","openAccess":false,"hostedById":""} +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journalId":"1434-0860","name":"Academic Therapy","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journalId":"0035-1776","name":"Revue de Synthèse","openAccess":false,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"0022-0116","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journalId":"0037-699X","name":"Slovenské divadlo","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"1573-3564","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journalId":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openAccess":false,"hostedById":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journalId":"0022-0493","name":"Journal of Economic Entomology","openAccess":false,"hostedById":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journalId":"0045-6713","name":"Children s Literature in Education","openAccess":false,"hostedById":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journalId":"0022-4715","name":"Journal of Statistical Physics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journalId":"0047-4800","name":"Littérature","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"1543-8325","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journalId":"0068-1024","name":"Brigham Young University science bulletin","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"0023-7639","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journalId":"0083-6656","name":"Vistas in Astronomy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journalId":"0033-3298","name":"Public Administration","openAccess":false,"hostedById":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journalId":"0090-502X","name":"Memory & Cognition","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication index 6ee4f94be..e67c05cd0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication @@ -1,2 +1,2 @@ -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1728-5852","name":"","openaccess":false,"hb_id":""} -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"0001-396X","name":"","openaccess":false,"hb_id":""} \ No newline at end of file +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"1728-5852","name":"","openAccess":false,"hostedById":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"0001-396X","name":"","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 index 59bd32d4b..e11311d0a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 @@ -1,2 +1,2 @@ -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1434-0860","name":"","openaccess":false,"hb_id":""} -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"0001-396X","name":"","openaccess":false,"hb_id":""} \ No newline at end of file +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"1434-0860","name":"","openAccess":false,"hostedById":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"0001-396X","name":"","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json index d9c201082..e037c1858 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json @@ -1 +1 @@ -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1434-0860","name":"Academic Therapy","openaccess":true,"hb_id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735"} \ No newline at end of file +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"1434-0860","name":"Academic Therapy","openAccess":true,"hostedById":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json index 64e2433ed..846be762a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json @@ -1,3 +1,3 @@ -{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} -{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} -{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} +{"id":"pubid","journalId":"issn","name":"ds_name","openAccess":true,"hostedById":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journalId":"issn","name":"ds_name","openAccess":true,"hostedById":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journalId":"issn","name":"ds_name","openAccess":true,"hostedById":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json index d4acba4a9..c4ac62ff5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json @@ -1,29 +1,29 @@ -{"issn":"2502-731X","issn_l":"2502-731X","title":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","title_source":"ROAD"} -{"issn":"2502-7409","issn_l":"1411-0253","title":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","title_source":"ROAD"} -{"issn":"2502-7433","issn_l":"2502-7433","title":"At-Tadbir : jurnal ilmiah manajemen","title_source":"ROAD"} -{"issn":"2502-745X","issn_l":"2502-745X","title":"Jurnal Kesehatan Panrita Husada.","title_source":"ROAD"} -{"issn":"2502-7549","issn_l":"2502-7549","title":"ELang journal (An English Education journal)","title_source":"ROAD"} -{"issn":"2423-3633","issn_l":"2423-3625","title":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","title_source":"ROAD"} -{"issn":"2423-5563","issn_l":"2423-3773","title":"Pizhūhishnāmah-i ̒ilm/sanjī.","title_source":"ROAD"} -{"issn":"1735-434X","issn_l":"1735-434X","title":"Iranian journal of animal biosystematics.","title_source":"ROAD"} -{"issn":"2423-4435","issn_l":"2008-6113","title":"Majallah-i jangal-i Īrān.","title_source":"ROAD"} -{"issn":"2423-4575","issn_l":"2423-4575","title":"Ābziyān-i zinatī.","title_source":"ROAD"} -{"issn":"2423-4974","issn_l":"2423-4974","title":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","title_source":"ROAD"} -{"issn":"2380-0607","issn_l":"2380-0607","title":"AIHM journal club.","title_source":"ROAD"} -{"issn":"1085-4568","issn_l":"1085-4568","title":"Frontiers.","title_source":"ROAD"} -{"issn":"2380-8845","issn_l":"2380-8845","title":"˜The œjournal of contemporary archival studies.","title_source":"ROAD"} -{"issn":"2381-1803","issn_l":"2381-1803","title":"International journal of complementary & alternative medicine.","title_source":"ROAD"} -{"issn":"2381-2478","issn_l":"2381-2478","title":"Palapala.","title_source":"ROAD"} -{"issn":"2382-5170","issn_l":"2382-5170","title":"Asia pacific journal of environment ecology and sustainable development.","title_source":"ROAD"} -{"issn":"2382-9737","issn_l":"2382-9737","title":"Majallah-i salāmat va bihdāsht","title_source":"ROAD"} -{"issn":"2382-977X","issn_l":"2382-977X","title":"UCT journal of research in science ,engineering and technology","title_source":"ROAD"} -{"issn":"2382-9974","issn_l":"2382-9974","title":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","title_source":"ROAD"} -{"issn":"2227-4782","issn_l":"2227-4782","title":"Problemi endokrinnoï patologìï.","title_source":"ROAD"} -{"issn":"2685-0079","issn_l":"2597-4971","title":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","title_source":"ROAD"} -{"issn":"2574-0075","issn_l":"2574-0075","title":"Hypermedia magazine.","title_source":"ROAD"} -{"issn":"2574-0296","issn_l":"2574-0296","title":"˜The œmuseum review.","title_source":"ROAD"} -{"issn":"2574-0334","issn_l":"2574-0334","title":"Bioactive compounds in health and disease.","title_source":"ROAD"} -{"issn":"2574-108X","issn_l":"2574-108X","title":"Journal of computer science integration.","title_source":"ROAD"} -{"issn":"2574-254X","issn_l":"2574-254X","title":"Child and adolescent obesity.","title_source":"ROAD"} -{"issn":"2574-3325","issn_l":"2574-3325","title":"Journal of research on the college president.","title_source":"ROAD"} -{"issn":"2239-6101","issn_l":"2239-5938","title":"European journal of sustainable development.","title_source":"ROAD"} \ No newline at end of file +{"issn":"2502-731X","issnL":"2502-731X","title":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","titleSource":"ROAD"} +{"issn":"2502-7409","issnL":"1411-0253","title":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","titleSource":"ROAD"} +{"issn":"2502-7433","issnL":"2502-7433","title":"At-Tadbir : jurnal ilmiah manajemen","titleSource":"ROAD"} +{"issn":"2502-745X","issnL":"2502-745X","title":"Jurnal Kesehatan Panrita Husada.","titleSource":"ROAD"} +{"issn":"2502-7549","issnL":"2502-7549","title":"ELang journal (An English Education journal)","titleSource":"ROAD"} +{"issn":"2423-3633","issnL":"2423-3625","title":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","titleSource":"ROAD"} +{"issn":"2423-5563","issnL":"2423-3773","title":"Pizhūhishnāmah-i ̒ilm/sanjī.","titleSource":"ROAD"} +{"issn":"1735-434X","issnL":"1735-434X","title":"Iranian journal of animal biosystematics.","titleSource":"ROAD"} +{"issn":"2423-4435","issnL":"2008-6113","title":"Majallah-i jangal-i Īrān.","titleSource":"ROAD"} +{"issn":"2423-4575","issnL":"2423-4575","title":"Ābziyān-i zinatī.","titleSource":"ROAD"} +{"issn":"2423-4974","issnL":"2423-4974","title":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","titleSource":"ROAD"} +{"issn":"2380-0607","issnL":"2380-0607","title":"AIHM journal club.","titleSource":"ROAD"} +{"issn":"1085-4568","issnL":"1085-4568","title":"Frontiers.","titleSource":"ROAD"} +{"issn":"2380-8845","issnL":"2380-8845","title":"˜The œjournal of contemporary archival studies.","titleSource":"ROAD"} +{"issn":"2381-1803","issnL":"2381-1803","title":"International journal of complementary & alternative medicine.","titleSource":"ROAD"} +{"issn":"2381-2478","issnL":"2381-2478","title":"Palapala.","titleSource":"ROAD"} +{"issn":"2382-5170","issnL":"2382-5170","title":"Asia pacific journal of environment ecology and sustainable development.","titleSource":"ROAD"} +{"issn":"2382-9737","issnL":"2382-9737","title":"Majallah-i salāmat va bihdāsht","titleSource":"ROAD"} +{"issn":"2382-977X","issnL":"2382-977X","title":"UCT journal of research in science ,engineering and technology","titleSource":"ROAD"} +{"issn":"2382-9974","issnL":"2382-9974","title":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","titleSource":"ROAD"} +{"issn":"2227-4782","issnL":"2227-4782","title":"Problemi endokrinnoï patologìï.","titleSource":"ROAD"} +{"issn":"2685-0079","issnL":"2597-4971","title":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","titleSource":"ROAD"} +{"issn":"2574-0075","issnL":"2574-0075","title":"Hypermedia magazine.","titleSource":"ROAD"} +{"issn":"2574-0296","issnL":"2574-0296","title":"˜The œmuseum review.","titleSource":"ROAD"} +{"issn":"2574-0334","issnL":"2574-0334","title":"Bioactive compounds in health and disease.","titleSource":"ROAD"} +{"issn":"2574-108X","issnL":"2574-108X","title":"Journal of computer science integration.","titleSource":"ROAD"} +{"issn":"2574-254X","issnL":"2574-254X","title":"Child and adolescent obesity.","titleSource":"ROAD"} +{"issn":"2574-3325","issnL":"2574-3325","title":"Journal of research on the college president.","titleSource":"ROAD"} +{"issn":"2239-6101","issnL":"2239-5938","title":"European journal of sustainable development.","titleSource":"ROAD"} \ No newline at end of file From baed5e3337def46cef839a4010c2b4715d8e3f81 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:14:47 +0200 Subject: [PATCH 214/287] test classes moved in specific components --- .../dhp/common/collection/GetCSVTest.java | 246 ------------------ .../collection/models/CSVProgramme.java | 80 ------ .../common/collection/models/CSVProject.java | 46 ---- .../common/collection/models/DOAJModel.java | 52 ---- .../collection/models/UnibiGoldModel.java | 45 ---- .../project/DownloadCsvTest.java | 145 +++++++++++ .../oa/graph/hostedbymap/DownloadCsvTest.java | 139 ++++++++++ 7 files changed, 284 insertions(+), 469 deletions(-) delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java deleted file mode 100644 index c45f83828..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java +++ /dev/null @@ -1,246 +0,0 @@ - -package eu.dnetlib.dhp.common.collection; - -import java.io.*; -import java.nio.file.Files; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.common.collection.models.CSVProgramme; -import eu.dnetlib.dhp.common.collection.models.CSVProject; -import eu.dnetlib.dhp.common.collection.models.DOAJModel; -import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel; -import jdk.nashorn.internal.ir.annotations.Ignore; - -public class GetCSVTest { - - private static String workingDir; - - private static LocalFileSystem fs; - - @Disabled - @Test - void getProgrammeFileTest() throws Exception { - - String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; - - GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class); - if (count == 0) { - Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); - Assertions - .assertTrue( - csvp - .getTitle() - .startsWith( - "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); - Assertions - .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); - Assertions.assertTrue(csvp.getShortTitle().equals("")); - Assertions.assertTrue(csvp.getLanguage().equals("en")); - } - if (count == 28) { - Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); - Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); - Assertions - .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); - Assertions.assertTrue(csvp.getLanguage().equals("de")); - } - if (count == 229) { - Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); - Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); - Assertions - .assertTrue( - csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); - Assertions.assertTrue(csvp.getLanguage().equals("en")); - } - Assertions.assertTrue(csvp.getCode() != null); - Assertions.assertTrue(csvp.getCode().startsWith("H2020")); - count += 1; - } - - Assertions.assertEquals(767, count); - } - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(GetCSVTest.class.getSimpleName()) - .toString(); - - fs = FileSystem.getLocal(new Configuration()); - } - - @Disabled - @Test - void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { - String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv"; - // String fileURL = "/Users/miriam.baglioni/Downloads/cordis-h2020projects.csv"; - - GetCSV - .getCsv( - fs, - new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) - // new BufferedReader(new FileReader(fileURL)) - , workingDir + "/projects", - "eu.dnetlib.dhp.common.collection.models.CSVProject", ';'); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects")))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class); - if (count == 0) { - Assertions.assertTrue(csvp.getId().equals("771736")); - Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.1.")); - Assertions.assertTrue(csvp.getTopics().equals("ERC-2017-COG")); - - } - if (count == 22882) { - Assertions.assertTrue(csvp.getId().equals("752903")); - Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2.")); - Assertions.assertTrue(csvp.getTopics().equals("MSCA-IF-2016")); - } - if (count == 223023) { - Assertions.assertTrue(csvp.getId().equals("861952")); - Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.4.e.")); - Assertions.assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019")); - } - Assertions.assertTrue(csvp.getId() != null); - Assertions.assertTrue(csvp.getProgramme().startsWith("H2020")); - count += 1; - } - - Assertions.assertEquals(34957, count); - } - - @Disabled - @Test - void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { - - String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; - - GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.UnibiGoldModel", ','); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); - if (count == 0) { - Assertions.assertTrue(unibi.getIssn().equals("0001-625X")); - Assertions.assertTrue(unibi.getIssn_l().equals("0001-625X")); - Assertions.assertTrue(unibi.getTitle().equals("Acta Mycologica")); - - } - if (count == 43158) { - Assertions.assertTrue(unibi.getIssn().equals("2088-6330")); - Assertions.assertTrue(unibi.getIssn_l().equals("2088-6330")); - Assertions.assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); - - } - if (count == 67027) { - Assertions.assertTrue(unibi.getIssn().equals("2658-7068")); - Assertions.assertTrue(unibi.getIssn_l().equals("2308-2488")); - Assertions.assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); - } - - count += 1; - } - - Assertions.assertEquals(67028, count); - } - - @Disabled - @Test - void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { - - String fileURL = "https://doaj.org/csv"; - - try (BufferedReader in = new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) { - try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - } - - GetCSV - .getCsv( - fs, new BufferedReader( - new FileReader("/tmp/DOAJ_1.csv")), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.DOAJModel", ','); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); - if (count == 0) { - Assertions.assertEquals("0001-3765", doaj.getIssn()); - Assertions.assertEquals("1678-2690", doaj.getEissn()); - Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); - - } - if (count == 7904) { - System.out.println(new ObjectMapper().writeValueAsString(doaj)); - Assertions.assertEquals("", doaj.getIssn()); - Assertions.assertEquals("2055-7159", doaj.getEissn()); - Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); - } - if (count == 16707) { - - Assertions.assertEquals("", doaj.getIssn()); - Assertions.assertEquals("2788-6298", doaj.getEissn()); - Assertions - .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); - } - - count += 1; - } - - Assertions.assertEquals(16713, count); - } - -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java deleted file mode 100644 index d17fcc75e..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java +++ /dev/null @@ -1,80 +0,0 @@ - -package eu.dnetlib.dhp.common.collection.models; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; -import com.opencsv.bean.CsvIgnore; - -/** - * The model for the programme csv file - */ -public class CSVProgramme implements Serializable { - - @CsvBindByName(column = "code") - private String code; - - @CsvBindByName(column = "title") - private String title; - - @CsvBindByName(column = "shortTitle") - private String shortTitle; - - @CsvBindByName(column = "language") - private String language; - - @CsvIgnore - private String classification; - - @CsvIgnore - private String classification_short; - - public String getClassification_short() { - return classification_short; - } - - public void setClassification_short(String classification_short) { - this.classification_short = classification_short; - } - - public String getClassification() { - return classification; - } - - public void setClassification(String classification) { - this.classification = classification; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getShortTitle() { - return shortTitle; - } - - public void setShortTitle(String shortTitle) { - this.shortTitle = shortTitle; - } - - public String getLanguage() { - return language; - } - - public void setLanguage(String language) { - this.language = language; - } - -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java deleted file mode 100644 index 62c847907..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java +++ /dev/null @@ -1,46 +0,0 @@ - -package eu.dnetlib.dhp.common.collection.models; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - -/** - * the mmodel for the projects csv file - */ -public class CSVProject implements Serializable { - - @CsvBindByName(column = "id") - private String id; - - @CsvBindByName(column = "programme") - private String programme; - - @CsvBindByName(column = "topics") - private String topics; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getProgramme() { - return programme; - } - - public void setProgramme(String programme) { - this.programme = programme; - } - - public String getTopics() { - return topics; - } - - public void setTopics(String topics) { - this.topics = topics; - } - -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java deleted file mode 100644 index 156ba3632..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java +++ /dev/null @@ -1,52 +0,0 @@ - -package eu.dnetlib.dhp.common.collection.models; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - -public class DOAJModel implements Serializable { - @CsvBindByName(column = "Journal title") - private String journalTitle; - - @CsvBindByName(column = "Journal ISSN (print version)") - private String issn; - - @CsvBindByName(column = "Journal EISSN (online version)") - private String eissn; - - @CsvBindByName(column = "Review process") - private String reviewProcess; - - public String getJournalTitle() { - return journalTitle; - } - - public void setJournalTitle(String journalTitle) { - this.journalTitle = journalTitle; - } - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getEissn() { - return eissn; - } - - public void setEissn(String eissn) { - this.eissn = eissn; - } - - public String getReviewProcess() { - return reviewProcess; - } - - public void setReviewProcess(String reviewProcess) { - this.reviewProcess = reviewProcess; - } -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java deleted file mode 100644 index ae47262bf..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java +++ /dev/null @@ -1,45 +0,0 @@ - -package eu.dnetlib.dhp.common.collection.models; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - -public class UnibiGoldModel implements Serializable { - @CsvBindByName(column = "ISSN") - private String issn; - @CsvBindByName(column = "ISSN_L") - private String issn_l; - @CsvBindByName(column = "TITLE") - private String title; - @CsvBindByName(column = "TITLE_SOURCE") - private String title_source; - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getIssn_l() { - return issn_l; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getTitle_source() { - return title_source; - } - - public void setTitle_source(String title_source) { - this.title_source = title_source; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java new file mode 100644 index 000000000..700b9e632 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java @@ -0,0 +1,145 @@ +package eu.dnetlib.dhp.actionmanager.project; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.*; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class DownloadCsvTest { + + private static String workingDir; + + private static LocalFileSystem fs; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(DownloadCsvTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + + @Disabled + @Test + void getProgrammeFileTest() throws Exception { + + String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + CSVProgramme.class.getName(), ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class); + if (count == 0) { + assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); + assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + assertTrue(csvp.getShortTitle().equals("")); + assertTrue(csvp.getLanguage().equals("en")); + } + if (count == 28) { + assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); + assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + assertTrue(csvp.getLanguage().equals("de")); + } + if (count == 229) { + assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); + assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + assertTrue(csvp.getLanguage().equals("en")); + } + assertTrue(csvp.getCode() != null); + assertTrue(csvp.getCode().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(767, count); + } + + @Disabled + @Test + void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { + String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv"; + + GetCSV + .getCsv( + fs, + new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) + , workingDir + "/projects", + CSVProject.class.getName(), ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class); + if (count == 0) { + assertTrue(csvp.getId().equals("771736")); + assertTrue(csvp.getProgramme().equals("H2020-EU.1.1.")); + assertTrue(csvp.getTopics().equals("ERC-2017-COG")); + + } + if (count == 22882) { + assertTrue(csvp.getId().equals("752903")); + assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2.")); + assertTrue(csvp.getTopics().equals("MSCA-IF-2016")); + } + if (count == 223023) { + assertTrue(csvp.getId().equals("861952")); + assertTrue(csvp.getProgramme().equals("H2020-EU.4.e.")); + assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019")); + } + assertTrue(csvp.getId() != null); + assertTrue(csvp.getProgramme().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(34957, count); + } + + @AfterAll + public static void cleanup() { + FileUtils.deleteQuietly(new File(workingDir)); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java new file mode 100644 index 000000000..b93320e1a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -0,0 +1,139 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.*; + +import java.io.*; +import java.nio.file.Files; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class DownloadCsvTest { + + private static String workingDir; + + private static LocalFileSystem fs; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(DownloadCsvTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + + @Disabled + @Test + void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + UnibiGoldModel.class.getName()); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); + if (count == 0) { + assertTrue(unibi.getIssn().equals("0001-625X")); + assertTrue(unibi.getIssnL().equals("0001-625X")); + assertTrue(unibi.getTitle().equals("Acta Mycologica")); + + } + if (count == 43158) { + assertTrue(unibi.getIssn().equals("2088-6330")); + assertTrue(unibi.getIssnL().equals("2088-6330")); + assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); + + } + if (count == 67027) { + assertTrue(unibi.getIssn().equals("2658-7068")); + assertTrue(unibi.getIssnL().equals("2308-2488")); + assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); + } + + count += 1; + } + + Assertions.assertEquals(67028, count); + } + + @Disabled + @Test + void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://doaj.org/csv"; + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) { + try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) { + String line; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + } + } + + GetCSV + .getCsv( + fs, new BufferedReader( + new FileReader("/tmp/DOAJ_1.csv")), + workingDir + "/programme", + DOAJModel.class.getName()); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); + if (count == 0) { + Assertions.assertEquals("0001-3765", doaj.getIssn()); + Assertions.assertEquals("1678-2690", doaj.getEissn()); + Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); + + } + if (count == 7904) { + System.out.println(new ObjectMapper().writeValueAsString(doaj)); + Assertions.assertEquals("", doaj.getIssn()); + Assertions.assertEquals("2055-7159", doaj.getEissn()); + Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); + } + if (count == 16707) { + + Assertions.assertEquals("", doaj.getIssn()); + Assertions.assertEquals("2788-6298", doaj.getEissn()); + Assertions + .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); + } + + count += 1; + } + + Assertions.assertEquals(16713, count); + } + + @AfterAll + public static void cleanup() { + FileUtils.deleteQuietly(new File(workingDir)); + } + +} From c3ad4ab701af352805b776a23ee9db7c426adab9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:23:15 +0200 Subject: [PATCH 215/287] minor fixes --- .../oa/graph/hostedbymap/SparkProduceHostedByMap.scala | 4 ++-- .../oa/graph/hostedbymap/download_csv_parameters.json | 9 +++------ .../dhp/oa/graph/hostedbymap/oozie_app/workflow.xml | 6 ++---- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index d0c603e29..1ee1d5d1a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -209,8 +209,8 @@ object SparkProduceHostedByMap { Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath) - .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold")) - .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) + .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json")) + .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json index fba048343..cf417c675 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -1,13 +1,10 @@ - [ - { "paramName":"fu", "paramLongName":"fileURL", "paramDescription": "the url to download the csv file ", "paramRequired": true }, - { "paramName":"wp", "paramLongName":"workingPath", @@ -27,9 +24,9 @@ "paramRequired": true }, { - "paramName": "sr", - "paramLongName": "replace", - "paramDescription": "true if the input file has to be cleaned before parsing", + "paramName": "d", + "paramLongName": "delimiter", + "paramDescription": "csv delimiter character", "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index d7b85b0cb..870b4ba0f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -78,7 +78,6 @@ - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -92,7 +91,6 @@ - @@ -100,7 +98,7 @@ - eu.dnetlib.dhp.common.collection.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --workingPath${workingDir}/unibi_gold @@ -113,7 +111,7 @@ - eu.dnetlib.dhp.common.collection.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV --hdfsNameNode${nameNode} --fileURL${doajFileURL} --workingPath${workingDir}/doaj From 7ee2757fcd97d2d842638f4e52290df8217f465b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:41:01 +0200 Subject: [PATCH 216/287] fixed DownloadCSV parameters spec; workflow patching the hostedby replaces the graph content (publication, datasource) rather than creating a copy --- .../SparkApplyHostedByMapToDatasource.scala | 13 ++++++++----- .../hostedbymap/SparkApplyHostedByMapToResult.scala | 7 +++++-- .../graph/hostedbymap/download_csv_parameters.json | 6 ++++++ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 424567830..1b18ba3ae 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -27,8 +27,8 @@ object SparkApplyHostedByMapToDatasource { d })(Encoders.bean((classOf[Datasource]))) } - def main(args: Array[String]): Unit = { + def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() @@ -41,18 +41,15 @@ object SparkApplyHostedByMapToDatasource { .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() - val graphPath = parser.get("graphPath") - val outputPath = parser.get("outputPath") val preparedInfoPath = parser.get("preparedInfoPath") - implicit val formats = DefaultFormats - implicit val mapEncoderPubs: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + val mapper = new ObjectMapper() val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") @@ -62,6 +59,12 @@ object SparkApplyHostedByMapToDatasource { .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + + spark.read.textFile(outputPath) + .write + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .text(graphPath + "/datasource") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 8b0b45513..db7ab4ac0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -75,8 +75,11 @@ object SparkApplyHostedByMapToResult { applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) - - + spark.read.textFile(outputPath) + .write + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .text(graphPath + "/publication") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json index cf417c675..22c65eb35 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -11,6 +11,12 @@ "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", "paramRequired": true }, + { + "paramName":"of", + "paramLongName":"outputFile", + "paramDescription": "the output json file produced by the CSV downlaod procedure", + "paramRequired": true + }, { "paramName": "hnn", "paramLongName": "hdfsNameNode", From 17cefe6a97edd8434ce806af5064ce1b766adedd Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:43:59 +0200 Subject: [PATCH 217/287] [HBM] removed stale replace option --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 870b4ba0f..a03f4e36a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -117,7 +117,6 @@ --workingPath${workingDir}/doaj --outputFile${workingDir}/doaj.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel - --replacetrue From 5f0903d50d6abff7ae5ae98a71b53d0045d62ae8 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 14:17:54 +0200 Subject: [PATCH 218/287] fixed CSV downloader & tests --- .../dhp/oa/graph/hostedbymap/DownloadCSV.java | 26 ++++++++---- .../oa/graph/hostedbymap/DownloadCsvTest.java | 42 ++++++++----------- 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java index c8329c99c..be35d31f8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -2,9 +2,12 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap; import java.io.*; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Objects; import java.util.Optional; +import eu.dnetlib.dhp.common.collection.CollectorException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -56,30 +59,37 @@ public class DownloadCSV { .orElse(DEFAULT_DELIMITER); log.info("delimiter {}", delimiter); - final HttpConnector2 connector2 = new HttpConnector2(); - Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); + + new DownloadCSV().doDownload(fileURL, workingPath, outputFile, classForName, delimiter, fileSystem); + + } + + protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, char delimiter, FileSystem fs) + throws IOException, ClassNotFoundException, CollectorException { + + final HttpConnector2 connector2 = new HttpConnector2(); + final Path path = new Path(workingPath + "/replaced.csv"); try (BufferedReader in = new BufferedReader( new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { - try (FSDataOutputStream fos = fileSystem.create(path, true)) { + try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(path, true), Charset.defaultCharset()))) { String line; while ((line = in.readLine()) != null) { - fos.writeUTF(line.replace("\\\"", "\"")); - fos.writeUTF("\n"); + writer.write(line.replace("\\\"", "\"")); + writer.newLine(); } } } - try (InputStreamReader reader = new InputStreamReader(fileSystem.open(path))) { - GetCSV.getCsv(fileSystem, reader, outputFile, classForName, delimiter); + try (InputStreamReader reader = new InputStreamReader(fs.open(path))) { + GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); } - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java index b93320e1a..7b02025fb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -39,14 +39,16 @@ public class DownloadCsvTest { String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; - GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - UnibiGoldModel.class.getName()); + final String outputFile = workingDir + "/unibi_gold.json"; + new DownloadCSV().doDownload( + fileURL, + workingDir + "/unibi_gold", + outputFile, + UnibiGoldModel.class.getName(), + ',', + fs); - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); String line; int count = 0; @@ -82,24 +84,16 @@ public class DownloadCsvTest { String fileURL = "https://doaj.org/csv"; - try (BufferedReader in = new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) { - try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - } + final String outputFile = workingDir + "/doaj.json"; + new DownloadCSV().doDownload( + fileURL, + workingDir + "/doaj", + outputFile, + DOAJModel.class.getName(), + ',', + fs); - GetCSV - .getCsv( - fs, new BufferedReader( - new FileReader("/tmp/DOAJ_1.csv")), - workingDir + "/programme", - DOAJModel.class.getName()); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); String line; int count = 0; From f74adc475275b32abb4a66a6ae3844fd566f4bff Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 15:52:15 +0200 Subject: [PATCH 219/287] added DownloadCSV2 as alternative implementation of the same download procedure --- .../dhp/oa/graph/hostedbymap/DownloadCSV.java | 14 +- .../oa/graph/hostedbymap/DownloadCSV2.java | 84 +++++++ .../hostedbymap/download_csv_parameters.json | 6 +- .../graph/hostedbymap/oozie_app/workflow.xml | 13 +- .../oa/graph/hostedbymap/DownloadCsvTest.java | 210 ++++++++++-------- 5 files changed, 214 insertions(+), 113 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java index be35d31f8..dff761c34 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -7,16 +7,15 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; import java.util.Optional; -import eu.dnetlib.dhp.common.collection.CollectorException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.common.collection.GetCSV; import eu.dnetlib.dhp.common.collection.HttpConnector2; @@ -68,8 +67,9 @@ public class DownloadCSV { } - protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, char delimiter, FileSystem fs) - throws IOException, ClassNotFoundException, CollectorException { + protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, + char delimiter, FileSystem fs) + throws IOException, ClassNotFoundException, CollectorException { final HttpConnector2 connector2 = new HttpConnector2(); @@ -78,11 +78,11 @@ public class DownloadCSV { try (BufferedReader in = new BufferedReader( new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { - try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(path, true), Charset.defaultCharset()))) { + try (PrintWriter writer = new PrintWriter( + new OutputStreamWriter(fs.create(path, true), StandardCharsets.UTF_8))) { String line; while ((line = in.readLine()) != null) { - writer.write(line.replace("\\\"", "\"")); - writer.newLine(); + writer.println(line.replace("\\\"", "\"")); } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java new file mode 100644 index 000000000..d82d00862 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java @@ -0,0 +1,84 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import java.io.*; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class DownloadCSV2 { + + private static final Logger log = LoggerFactory.getLogger(DownloadCSV2.class); + + public static final char DEFAULT_DELIMITER = ';'; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + DownloadCSV2.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + log.info("fileURL {}", fileURL); + + final String tmpFile = parser.get("tmpFile"); + log.info("tmpFile {}", tmpFile); + + final String outputFile = parser.get("outputFile"); + log.info("outputFile {}", outputFile); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + final String classForName = parser.get("classForName"); + log.info("classForName {}", classForName); + + final char delimiter = Optional + .ofNullable(parser.get("delimiter")) + .map(s -> s.charAt(0)) + .orElse(DEFAULT_DELIMITER); + log.info("delimiter {}", delimiter); + + HttpConnector2 connector2 = new HttpConnector2(); + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { + + try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(tmpFile)))) { + String line; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + } + } + + try (BufferedReader in = new BufferedReader(new FileReader(tmpFile))) { + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + + GetCSV.getCsv(fileSystem, in, outputFile, classForName, delimiter); + } finally { + FileUtils.deleteQuietly(new File(tmpFile)); + } + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json index 22c65eb35..50fbb00f0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -6,9 +6,9 @@ "paramRequired": true }, { - "paramName":"wp", - "paramLongName":"workingPath", - "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramName":"tf", + "paramLongName":"tmpFile", + "paramDescription": "the temporary local file storing the cleaned CSV contents for unibi gold list and doj artciles", "paramRequired": true }, { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index a03f4e36a..84035fe4e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -73,7 +73,8 @@ - ${wf:conf('resumeFrom') eq 'ProduceHBM'} + ${wf:conf('resumeFrom') eq 'ProduceHBM'} + ${wf:conf('resumeFrom') eq 'download_csv'} @@ -98,10 +99,10 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2 --hdfsNameNode${nameNode} --fileURL${unibiFileURL} - --workingPath${workingDir}/unibi_gold + --tmpFile/tmp/unibi_gold_replaced.csv --outputFile${workingDir}/unibi_gold.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel @@ -111,10 +112,10 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2 --hdfsNameNode${nameNode} --fileURL${doajFileURL} - --workingPath${workingDir}/doaj + --tmpFile/tmp/doaj_replaced.csv --outputFile${workingDir}/doaj.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel @@ -141,7 +142,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --datasourcePath${sourcePath}/datasource - --workingPath${workingDir} + --workingPath/user/${wf:user()}/data --outputPath${hostedByMapPath} --masteryarn-cluster diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java index 7b02025fb..edf74fc6a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -1,133 +1,149 @@ + package eu.dnetlib.dhp.oa.graph.hostedbymap; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.collection.CollectorException; -import eu.dnetlib.dhp.common.collection.GetCSV; -import eu.dnetlib.dhp.common.collection.HttpConnector2; -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; + import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.*; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.io.*; -import java.nio.file.Files; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.assertTrue; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; public class DownloadCsvTest { - private static String workingDir; + private static final Logger log = LoggerFactory.getLogger(DownloadCsvTest.class); - private static LocalFileSystem fs; + private static String workingDir; - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(DownloadCsvTest.class.getSimpleName()) - .toString(); + private static LocalFileSystem fs; - fs = FileSystem.getLocal(new Configuration()); - } + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(DownloadCsvTest.class.getSimpleName()) + .toString(); - @Disabled - @Test - void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { + fs = FileSystem.getLocal(new Configuration()); + } - String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; + @Disabled + @Test + void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { - final String outputFile = workingDir + "/unibi_gold.json"; - new DownloadCSV().doDownload( - fileURL, - workingDir + "/unibi_gold", - outputFile, - UnibiGoldModel.class.getName(), - ',', - fs); + String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); + final String outputFile = workingDir + "/unibi_gold.json"; + new DownloadCSV() + .doDownload( + fileURL, + workingDir + "/unibi_gold", + outputFile, + UnibiGoldModel.class.getName(), + ',', + fs); - String line; - int count = 0; - while ((line = in.readLine()) != null) { - UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); - if (count == 0) { - assertTrue(unibi.getIssn().equals("0001-625X")); - assertTrue(unibi.getIssnL().equals("0001-625X")); - assertTrue(unibi.getTitle().equals("Acta Mycologica")); + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); - } - if (count == 43158) { - assertTrue(unibi.getIssn().equals("2088-6330")); - assertTrue(unibi.getIssnL().equals("2088-6330")); - assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); + String line; + int count = 0; + while ((line = in.readLine()) != null) { + UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); + if (count == 0) { + assertTrue(unibi.getIssn().equals("0001-625X")); + assertTrue(unibi.getIssnL().equals("0001-625X")); + assertTrue(unibi.getTitle().equals("Acta Mycologica")); - } - if (count == 67027) { - assertTrue(unibi.getIssn().equals("2658-7068")); - assertTrue(unibi.getIssnL().equals("2308-2488")); - assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); - } + } + if (count == 43158) { + assertTrue(unibi.getIssn().equals("2088-6330")); + assertTrue(unibi.getIssnL().equals("2088-6330")); + assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); - count += 1; - } + } + if (count == 67027) { + assertTrue(unibi.getIssn().equals("2658-7068")); + assertTrue(unibi.getIssnL().equals("2308-2488")); + assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); + } - Assertions.assertEquals(67028, count); - } + count += 1; + } - @Disabled - @Test - void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { + assertEquals(67028, count); + } - String fileURL = "https://doaj.org/csv"; + @Disabled + @Test + void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { - final String outputFile = workingDir + "/doaj.json"; - new DownloadCSV().doDownload( - fileURL, - workingDir + "/doaj", - outputFile, - DOAJModel.class.getName(), - ',', - fs); + String fileURL = "https://doaj.org/csv"; - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); + final String outputFile = workingDir + "/doaj.json"; + new DownloadCSV() + .doDownload( + fileURL, + workingDir + "/doaj", + outputFile, + DOAJModel.class.getName(), + ',', + fs); - String line; - int count = 0; - while ((line = in.readLine()) != null) { - DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); - if (count == 0) { - Assertions.assertEquals("0001-3765", doaj.getIssn()); - Assertions.assertEquals("1678-2690", doaj.getEissn()); - Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); - } - if (count == 7904) { - System.out.println(new ObjectMapper().writeValueAsString(doaj)); - Assertions.assertEquals("", doaj.getIssn()); - Assertions.assertEquals("2055-7159", doaj.getEissn()); - Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); - } - if (count == 16707) { + String line; + int count = 0; + while ((line = in.readLine()) != null) { + DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); + if (count == 0) { + assertEquals("0001-3765", doaj.getIssn()); + assertEquals("1678-2690", doaj.getEissn()); + assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); + } + if (count == 22) { + log.info(new ObjectMapper().writeValueAsString(doaj)); + System.out.println(new ObjectMapper().writeValueAsString(doaj)); + } + if (count == 7904) { + // log.info(new ObjectMapper().writeValueAsString(doaj)); + assertEquals("", doaj.getIssn()); + assertEquals("2055-7159", doaj.getEissn()); + assertEquals("BJR|case reports", doaj.getJournalTitle()); + } + if (count == 16707) { - Assertions.assertEquals("", doaj.getIssn()); - Assertions.assertEquals("2788-6298", doaj.getEissn()); - Assertions - .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); - } + assertEquals("2783-1043", doaj.getIssn()); + assertEquals("2783-1051", doaj.getEissn()); + assertEquals("فیزیک کاربردی ایران", doaj.getJournalTitle()); + } - count += 1; - } + count += 1; + } - Assertions.assertEquals(16713, count); - } + assertEquals(16715, count); + } - @AfterAll - public static void cleanup() { - FileUtils.deleteQuietly(new File(workingDir)); - } + @AfterAll + public static void cleanup() { + FileUtils.deleteQuietly(new File(workingDir)); + } } From 7743d0f9193e2fc9837f7e101ed25e398296b8c3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 16:14:54 +0200 Subject: [PATCH 220/287] consolidated dnet wf profiles into the same submodule --- .../dedup}/dataset_dedup_configuration.xml | 0 ...herresearchproduct_dedup_configuration.xml | 0 .../publication_dedup_configuration.xml | 0 .../result_deduplication_orchestrator.xml | 0 .../dedup}/software_dedup_configuration.xml | 0 .../dnetlib/dhp/provision/02_beta_graph.xml | 25 + dhp-workflows/dhp-workflow-profiles/pom.xml | 27 - .../wf/profiles/graph_beta_construction.xml | 779 ------------------ .../wf/profiles/graph_prod_construction.xml | 705 ---------------- .../dhp/wf/profiles/graph_to_hiveDB.xml | 73 -- .../dnetlib/dhp/wf/profiles/update_solr.xml | 98 --- .../dnetlib/dhp/wf/profiles/update_stats.xml | 74 -- 12 files changed, 25 insertions(+), 1756 deletions(-) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/dataset_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/otherresearchproduct_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/publication_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/result_deduplication_orchestrator.xml (100%) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/software_dedup_configuration.xml (100%) delete mode 100644 dhp-workflows/dhp-workflow-profiles/pom.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/dataset_dedup_configuration.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/dataset_dedup_configuration.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/otherresearchproduct_dedup_configuration.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/otherresearchproduct_dedup_configuration.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/publication_dedup_configuration.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/publication_dedup_configuration.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/result_deduplication_orchestrator.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/result_deduplication_orchestrator.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/software_dedup_configuration.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/software_dedup_configuration.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml index 766783f8b..344e99044 100644 --- a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -660,6 +660,31 @@ build-report + + + + + + updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedFirstGraphPath' + } + + + { + 'resumeFrom' : 'produceHBM', + 'hostedByMapPath' : '/user/dnet.beta/data', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/hostedbymap/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/hostedbymap', + 'outputPath' : '/tmp/beta_provision/working_dir/hostedbymap' + } + + build-report + diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml deleted file mode 100644 index 54e76c1e2..000000000 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - dhp-workflows - eu.dnetlib.dhp - 1.2.4-SNAPSHOT - - 4.0.0 - - dhp-workflow-profiles - jar - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml deleted file mode 100644 index 08ed24cd0..000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml +++ /dev/null @@ -1,779 +0,0 @@ - -
- - - - - -
- - Graph Construction [HYBRID] - Data Provision - 30 - - - - reuse cached content from the PROD aggregation system - - reuseProdContent - true - - - - - - - - set the PROD aggregator content path - - prodContentPath - /tmp/core_aggregator - - - - - - - - Set the path containing the PROD AGGREGATOR graph - - prodAggregatorGraphPath - /tmp/core_provision/graph/00_prod_graph_aggregator - - - - - - - - reuse cached content from the BETA aggregation system - - reuseBetaContent - true - - - - - - - - set the BETA aggregator content path - - betaContentPath - /tmp/beta_aggregator - - - - - - - - Set the path containing the BETA AGGREGATOR graph - - betaAggregatorGraphPath - /tmp/core_provision/graph/00_beta_graph_aggregator - - - - - - - - Set the IS lookup service address - - isLookUpUrl - http://services.openaire.eu:8280/is/services/isLookUp?wsdl - - - - - - - - Set the target path to store the MERGED graph - - mergedGraphPath - /tmp/core_provision/graph/01_graph_merged - - - - - - - - Set the target path to store the RAW graph - - rawGraphPath - /tmp/core_provision/graph/02_graph_raw - - - - - - - - Set the target path to store the DEDUPED graph - - dedupGraphPath - /tmp/core_provision/graph/03_graph_dedup - - - - - - - - Set the target path to store the INFERRED graph - - inferredGraphPath - /tmp/core_provision/graph/04_graph_inferred - - - - - - - - Set the target path to store the CONSISTENCY graph - - consistentGraphPath - /tmp/core_provision/graph/05_graph_consistent - - - - - - - - Set the target path to store the ORCID enriched graph - - orcidGraphPath - /tmp/core_provision/graph/06_graph_orcid - - - - - - - - Set the target path to store the BULK TAGGED graph - - bulkTaggingGraphPath - /tmp/core_provision/graph/07_graph_bulktagging - - - - - - - - Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph - - affiliationGraphPath - /tmp/core_provision/graph/08_graph_affiliation - - - - - - - - Set the target path to store the COMMUNITY from SELECTED SOURCES graph - - communityOrganizationGraphPath - /tmp/core_provision/graph/09_graph_comunity_organization - - - - - - - - Set the target path to store the FUNDING from SEMANTIC RELATION graph - - fundingGraphPath - /tmp/core_provision/graph/10_graph_funding - - - - - - - - Set the target path to store the COMMUNITY from SEMANTIC RELATION graph - - communitySemRelGraphPath - /tmp/core_provision/graph/11_graph_comunity_sem_rel - - - - - - - - Set the target path to store the COUNTRY enriched graph - - countryGraphPath - /tmp/core_provision/graph/12_graph_country - - - - - - - - Set the target path to store the CLEANED graph - - cleanedGraphPath - /tmp/core_provision/graph/13_graph_cleaned - - - - - - - - Set the target path to store the blacklisted graph - - blacklistedGraphPath - /tmp/core_provision/graph/14_graph_blacklisted - - - - - - - - Set the map of paths for the Bulk Tagging - - bulkTaggingPathMap - {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} - - - - - - - - Set the map of associations organization, community list for the propagation of community to result through organization - - propagationOrganizationCommunityMap - {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], - "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} - - - - - - - - - Set the dedup orchestrator name - - dedupConfig - decisiontree-dedup-test - - - - - - - - declares the ActionSet ids to promote in the RAW graph - - actionSetIdsRawGraph - scholexplorer-dump,gridac-dump,doiboost-organizations,doiboost,orcidworks-no-doi,iis-wos-entities,iis-entities-software,iis-entities-patent - - - - - - - - declares the ActionSet ids to promote in the INFERRED graph - - actionSetIdsIISGraph - iis-researchinitiative,iis-document-citations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-datasets-preprocessing,iis-referenced-projects-main,iis-referenced-projects-preprocessing,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19 - - - - - - - - wait configurations - - - - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'graphOutputPath' : 'betaAggregatorGraphPath', - 'isLookupUrl' : 'isLookUpUrl', - 'reuseContent' : 'reuseBetaContent', - 'contentPath' : 'betaContentPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app', - 'mongoURL' : 'mongodb://beta.services.openaire.eu', - 'mongoDb' : 'mdstore', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '', - 'workingDir' : '/tmp/core_provision/working_dir/beta_aggregator' - } - - build-report - - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'graphOutputPath' : 'prodAggregatorGraphPath', - 'isLookupUrl' : 'isLookUpUrl', - 'reuseContent' : 'reuseProdContent', - 'contentPath' : 'prodContentPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app', - 'mongoURL' : 'mongodb://services.openaire.eu', - 'mongoDb' : 'mdstore', - 'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '', - 'workingDir' : '/tmp/core_provision/working_dir/prod_aggregator' - } - - build-report - - - - - - - - wait configurations - - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'betaInputGgraphPath' : 'betaAggregatorGraphPath', - 'prodInputGgraphPath' : 'prodAggregatorGraphPath', - 'graphOutputPath' : 'mergedGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/merge/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/merge_graph' - } - - build-report - - - - - - - - create the RAW graph - - executeOozieJob - IIS - - { - 'inputActionSetIds' : 'actionSetIdsRawGraph', - 'inputGraphRootPath' : 'mergedGraphPath', - 'outputGraphRootPath' : 'rawGraphPath', - 'isLookupUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'activePromoteDatasetActionPayload' : 'true', - 'activePromoteDatasourceActionPayload' : 'true', - 'activePromoteOrganizationActionPayload' : 'true', - 'activePromoteOtherResearchProductActionPayload' : 'true', - 'activePromoteProjectActionPayload' : 'true', - 'activePromotePublicationActionPayload' : 'true', - 'activePromoteRelationActionPayload' : 'true', - 'activePromoteResultActionPayload' : 'true', - 'activePromoteSoftwareActionPayload' : 'true', - 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/core_provision/working_dir/promoteActionsRaw' - } - - build-report - - - - - - - - search for duplicates in the raw graph - - executeOozieJob - IIS - - { - 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'rawGraphPath', - 'dedupGraphPath': 'dedupGraphPath', - 'isLookUpUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/scan/oozie_app', - 'workingPath' : '/tmp/core_provision/working_dir/dedup' - } - - build-report - - - - - - - - create the INFERRED graph - - executeOozieJob - IIS - - { - 'inputActionSetIds' : 'actionSetIdsIISGraph', - 'inputGraphRootPath' : 'dedupGraphPath', - 'outputGraphRootPath' : 'inferredGraphPath', - 'isLookupUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'activePromoteDatasetActionPayload' : 'true', - 'activePromoteDatasourceActionPayload' : 'true', - 'activePromoteOrganizationActionPayload' : 'true', - 'activePromoteOtherResearchProductActionPayload' : 'true', - 'activePromoteProjectActionPayload' : 'true', - 'activePromotePublicationActionPayload' : 'true', - 'activePromoteRelationActionPayload' : 'true', - 'activePromoteResultActionPayload' : 'true', - 'activePromoteSoftwareActionPayload' : 'true', - 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/core_provision/working_dir/promoteActionsIIS' - } - - build-report - - - - - - - - mark duplicates as deleted and redistribute the relationships - - executeOozieJob - IIS - - { - 'graphBasePath' : 'inferredGraphPath', - 'dedupGraphPath': 'consistentGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/consistency/oozie_app', - 'workingPath' : '/tmp/core_provision/working_dir/dedup' - } - - build-report - - - - - - - - propagates ORCID among results linked by allowedsemrels semantic relationships - - executeOozieJob - IIS - - { - 'sourcePath' : 'consistentGraphPath', - 'outputPath': 'orcidGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/orcidtoresultfromsemrel/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/orcid', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - mark results respecting some rules as belonging to communities - - executeOozieJob - IIS - - { - 'sourcePath' : 'orcidGraphPath', - 'outputPath': 'bulkTaggingGraphPath', - 'isLookUpUrl' : 'isLookUpUrl', - 'pathMap' : 'bulkTaggingPathMap' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/bulktag/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/bulktag' - } - - build-report - - - - - - - - creates relashionships between results and organizations when the organizations are associated to institutional repositories - - executeOozieJob - IIS - - { - 'sourcePath' : 'bulkTaggingGraphPath', - 'outputPath': 'affiliationGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/affiliation/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/affiliation', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap - - executeOozieJob - IIS - - { - 'sourcePath' : 'affiliationGraphPath', - 'outputPath': 'communityOrganizationGraphPath', - 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_organization/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/community_organization', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects - - executeOozieJob - IIS - - { - 'sourcePath' : 'communityOrganizationGraphPath', - 'outputPath': 'fundingGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/funding/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/funding', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities - - executeOozieJob - IIS - - { - 'sourcePath' : 'fundingGraphPath', - 'outputPath': 'communitySemRelGraphPath', - 'isLookUpUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_semrel/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/community_semrel', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from - - executeOozieJob - IIS - - { - 'sourcePath' : 'communitySemRelGraphPath', - 'outputPath': 'countryGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/country/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'workingDir' : '/tmp/core_provision/working_dir/country', - 'allowedtypes' : 'pubsrepository::institutional', - 'whitelist' : '10|opendoar____::300891a62162b960cf02ce3827bb363c', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid - - executeOozieJob - IIS - - { - 'graphInputPath' : 'countryGraphPath', - 'graphOutputPath': 'cleanedGraphPath', - 'isLookupUrl': 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', - 'workingPath' : '/tmp/core_provision/working_dir/clean' - } - - build-report - - - - - - - - removes blacklisted relations - - executeOozieJob - IIS - - { - 'sourcePath' : 'cleanedGraphPath', - 'outputPath': 'blacklistedGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/blacklist/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/blacklist', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml deleted file mode 100644 index 4eab12c73..000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml +++ /dev/null @@ -1,705 +0,0 @@ - -
- - - - - -
- - Graph Construction [PROD] - Data Provision - 30 - - - - reuse cached content from the aggregation system - - reuseContent - true - - - - - - - - set the aggregator content path - - contentPath - /tmp/beta_aggregator - - - - - - - - Set the path containing the AGGREGATOR graph - - aggregatorGraphPath - /tmp/beta_provision/graph/00_graph_aggregator - - - - - - - - Set the target path to store the RAW graph - - rawGraphPath - /tmp/beta_provision/graph/01_graph_raw - - - - - - - - Set the target path to store the first CLEANED graph - - firstCleanedGraphPath - /tmp/prod_provision/graph/02_graph_first_cleaned - - - - - - - - Set the target path to store the DEDUPED graph - - dedupGraphPath - /tmp/beta_provision/graph/03_graph_dedup - - - - - - - - Set the target path to store the INFERRED graph - - inferredGraphPath - /tmp/beta_provision/graph/04_graph_inferred - - - - - - - - Set the target path to store the CONSISTENCY graph - - consistentGraphPath - /tmp/beta_provision/graph/05_graph_consistent - - - - - - - - Set the target path to store the ORCID enriched graph - - orcidGraphPath - /tmp/beta_provision/graph/06_graph_orcid - - - - - - - - Set the target path to store the BULK TAGGED graph - - bulkTaggingGraphPath - /tmp/beta_provision/graph/07_graph_bulktagging - - - - - - - - Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph - - affiliationGraphPath - /tmp/beta_provision/graph/08_graph_affiliation - - - - - - - - Set the target path to store the COMMUNITY from SELECTED SOURCES graph - - communityOrganizationGraphPath - /tmp/beta_provision/graph/09_graph_comunity_organization - - - - - - - - Set the target path to store the FUNDING from SEMANTIC RELATION graph - - fundingGraphPath - /tmp/beta_provision/graph/10_graph_funding - - - - - - - - Set the target path to store the COMMUNITY from SEMANTIC RELATION graph - - communitySemRelGraphPath - /tmp/beta_provision/graph/11_graph_comunity_sem_rel - - - - - - - - Set the target path to store the COUNTRY enriched graph - - countryGraphPath - /tmp/beta_provision/graph/12_graph_country - - - - - - - - Set the target path to store the CLEANED graph - - cleanedGraphPath - /tmp/beta_provision/graph/13_graph_cleaned - - - - - - - - Set the target path to store the blacklisted graph - - blacklistedGraphPath - /tmp/beta_provision/graph/14_graph_blacklisted - - - - - - - - Set the lookup address - - isLookUpUrl - http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl - - - - - - - - Set the map of paths for the Bulk Tagging - - bulkTaggingPathMap - {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} - - - - - - - - Set the map of associations organization, community list for the propagation of community to result through organization - - propagationOrganizationCommunityMap - {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], - "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} - - - - - - - - - Set the dedup orchestrator name - - dedupConfig - decisiontree-dedup-test - - - - - - - - declares the ActionSet ids to promote in the RAW graph - - actionSetIdsRawGraph - scholexplorer-dump,gridac-dump,doiboost-organizations,doiboost,orcidworks-no-doi,iis-wos-entities,iis-entities-software,iis-entities-patent - - - - - - - - declares the ActionSet ids to promote in the INFERRED graph - - actionSetIdsIISGraph - iis-researchinitiative,iis-document-citations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-datasets-preprocessing,iis-referenced-projects-main,iis-referenced-projects-preprocessing,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19 - - - - - - - - wait configurations - - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'graphOutputPath' : 'aggregatorGraphPath', - 'isLookupUrl' : 'isLookUpUrl', - 'reuseContent' : 'reuseContent', - 'contentPath' : 'contentPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app', - 'mongoURL' : 'mongodb://beta.services.openaire.eu', - 'mongoDb' : 'mdstore', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '', - 'workingDir' : '/tmp/beta_provision/working_dir/aggregator' - } - - build-report - - - - - - - - create the RAW graph - - executeOozieJob - IIS - - { - 'inputActionSetIds' : 'actionSetIdsRawGraph', - 'inputGraphRootPath' : 'aggregatorGraphPath', - 'outputGraphRootPath' : 'rawGraphPath', - 'isLookupUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'activePromoteDatasetActionPayload' : 'true', - 'activePromoteDatasourceActionPayload' : 'true', - 'activePromoteOrganizationActionPayload' : 'true', - 'activePromoteOtherResearchProductActionPayload' : 'true', - 'activePromoteProjectActionPayload' : 'true', - 'activePromotePublicationActionPayload' : 'true', - 'activePromoteRelationActionPayload' : 'true', - 'activePromoteResultActionPayload' : 'true', - 'activePromoteSoftwareActionPayload' : 'true', - 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsRaw' - } - - build-report - - - - - - - - clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid - - executeOozieJob - IIS - - { - 'graphInputPath' : 'rawGraphPath', - 'graphOutputPath': 'firstCleanedGraphPath', - 'isLookupUrl': 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/first_clean' - } - - build-report - - - - - - - - search for duplicates in the raw graph - - executeOozieJob - IIS - - { - 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'firstCleanedGraphPath', - 'dedupGraphPath': 'dedupGraphPath', - 'isLookUpUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/scan/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/dedup' - } - - build-report - - - - - - - - create the INFERRED graph - - executeOozieJob - IIS - - { - 'inputActionSetIds' : 'actionSetIdsIISGraph', - 'inputGraphRootPath' : 'dedupGraphPath', - 'outputGraphRootPath' : 'inferredGraphPath', - 'isLookupUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'activePromoteDatasetActionPayload' : 'true', - 'activePromoteDatasourceActionPayload' : 'true', - 'activePromoteOrganizationActionPayload' : 'true', - 'activePromoteOtherResearchProductActionPayload' : 'true', - 'activePromoteProjectActionPayload' : 'true', - 'activePromotePublicationActionPayload' : 'true', - 'activePromoteRelationActionPayload' : 'true', - 'activePromoteResultActionPayload' : 'true', - 'activePromoteSoftwareActionPayload' : 'true', - 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsIIS' - } - - build-report - - - - - - - - mark duplicates as deleted and redistribute the relationships - - executeOozieJob - IIS - - { - 'graphBasePath' : 'inferredGraphPath', - 'dedupGraphPath': 'consistentGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/consistency/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/dedup' - } - - build-report - - - - - - - - propagates ORCID among results linked by allowedsemrels semantic relationships - - executeOozieJob - IIS - - { - 'sourcePath' : 'consistentGraphPath', - 'outputPath': 'orcidGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/orcidtoresultfromsemrel/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/orcid', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - mark results respecting some rules as belonging to communities - - executeOozieJob - IIS - - { - 'sourcePath' : 'orcidGraphPath', - 'outputPath': 'bulkTaggingGraphPath', - 'isLookUpUrl' : 'isLookUpUrl', - 'pathMap' : 'bulkTaggingPathMap' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/bulktag/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/bulktag' - } - - build-report - - - - - - - - creates relashionships between results and organizations when the organizations are associated to institutional repositories - - executeOozieJob - IIS - - { - 'sourcePath' : 'bulkTaggingGraphPath', - 'outputPath': 'affiliationGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/affiliation/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/affiliation', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap - - executeOozieJob - IIS - - { - 'sourcePath' : 'affiliationGraphPath', - 'outputPath': 'communityOrganizationGraphPath', - 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_organization/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/community_organization', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects - - executeOozieJob - IIS - - { - 'sourcePath' : 'communityOrganizationGraphPath', - 'outputPath': 'fundingGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/funding/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/funding', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities - - executeOozieJob - IIS - - { - 'sourcePath' : 'fundingGraphPath', - 'outputPath': 'communitySemRelGraphPath', - 'isLookUpUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_semrel/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/community_semrel', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from - - executeOozieJob - IIS - - { - 'sourcePath' : 'communitySemRelGraphPath', - 'outputPath': 'countryGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/country/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'workingDir' : '/tmp/beta_provision/working_dir/country', - 'allowedtypes' : 'pubsrepository::institutional', - 'whitelist' : '10|opendoar____::300891a62162b960cf02ce3827bb363c', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid - - executeOozieJob - IIS - - { - 'graphInputPath' : 'countryGraphPath', - 'graphOutputPath': 'cleanedGraphPath', - 'isLookupUrl': 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/clean' - } - - build-report - - - - - - - - removes blacklisted relations - - executeOozieJob - IIS - - { - 'sourcePath' : 'cleanedGraphPath', - 'outputPath': 'blacklistedGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/blacklist/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/blacklist', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml deleted file mode 100644 index 0ace12ea3..000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml +++ /dev/null @@ -1,73 +0,0 @@ - -
- - - - - -
- - Graph to HiveDB [OCEAN] - Data Provision - 30 - - - Set the path containing the AGGREGATOR graph - - inputPath - - - - - - - - Set the target path to store the RAW graph - - hiveDbName - - - - - - - - - wait configurations - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'inputPath' : 'inputPath', - 'hiveDbName' : 'hiveDbName' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/hive/oozie_app' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml deleted file mode 100644 index 8a7738bcf..000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml +++ /dev/null @@ -1,98 +0,0 @@ - -
- - - - - -
- - Update Solr [OCEAN] - Data Provision - 30 - - - Set the path containing the AGGREGATOR graph - - inputGraphRootPath - - - - - - - - Set the target path to store the RAW graph - - format - TMF - - - - - - - Set the lookup address - - isLookupUrl - http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl - - - - - - - - wait configurations - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'inputGraphRootPath' : 'inputGraphRootPath', - 'isLookupUrl' : 'isLookupUrl', - 'format' : 'format' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/provision/oozie_app', - 'maxRelations' : '100', - 'relPartitions' : '3000', - 'batchSize' : '2000', - 'relationFilter' : 'isAuthorInstitutionOf,produces,hasAmongTopNSimilarDocuments', - 'otherDsTypeId' : 'scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource', - 'resumeFrom' : 'prepare_relations', - 'sparkDriverMemoryForJoining' : '3G', - 'sparkExecutorMemoryForJoining' : '7G', - 'sparkExecutorCoresForJoining' : '4', - 'sparkDriverMemoryForIndexing' : '2G', - 'sparkExecutorMemoryForIndexing' : '2G', - 'sparkExecutorCoresForIndexing' : '64', - 'sparkNetworkTimeout' : '600', - 'workingDir' : '/tmp/beta_provision/working_dir/update_solr' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml deleted file mode 100644 index a91b6302e..000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml +++ /dev/null @@ -1,74 +0,0 @@ - -
- - - - - -
- - Update Stats [OCEAN] - Data Provision - 30 - - - Set the path containing the AGGREGATOR graph - - openaire_db_name - - - - - - - - Set the target path to store the RAW graph - - stats_db_name - - - - - - - - - wait configurations - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'openaire_db_name' : 'openaire_db_name', - 'stats_db_name' : 'stats_db_name' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/stats/oozie_app', - 'hive_timeout' : '3000' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file From ed7e28490a2cf0c08a7b67240b28bfebe6f290cc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 16:19:01 +0200 Subject: [PATCH 221/287] change in sh --- .../dhp/doiboost/crossref_dump_reader/oozie_app/download.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh index 2ce704283..bb7ec0b45 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh @@ -1,2 +1,2 @@ #!/bin/bash -curl -LSs -H "Crossref-Plus-API-Token: Bearer $4" $1 | hdfs dfs -put $2/$3 \ No newline at end of file +curl -LSs -H "Crossref-Plus-API-Token: Bearer $4" $1 | hdfs dfs -put - $2/$3 \ No newline at end of file From 6fec71e8d2bb2d27b73d83cc273bd20b2465ceb6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 16:39:02 +0200 Subject: [PATCH 222/287] removed the specific of the infra we are running the wf from the wf name --- .../eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml | 2 +- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 3700ce5d9..ab3b9593e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sparkDriverMemory diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index 831ff5a57..f5596b60e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sparkDriverMemory From bc7068106c9b5ae1737efcd2556da267f326541a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 17:19:44 +0200 Subject: [PATCH 223/287] added crossref download oozie workflow --- .../oozie_app/config-default.xml | 42 ++++++ .../oozie_app/workflow.xml | 128 ++++++++++++++++++ 2 files changed, 170 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml new file mode 100644 index 000000000..508202e30 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml @@ -0,0 +1,42 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index e69de29bb..6e4f17912 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -0,0 +1,128 @@ + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + + crossrefdumpfilename + the Crossref input path + + + crossrefDumpPath + the Crossref dump path + + + crossrefdumptoken + the token for the API dump path + + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + ${crossrefdumptoken} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords + --hdfsServerUri${nameNode} + --crossrefFileNameTarGz${crossrefdumpfilename} + --workingPath${crossrefDumpPath} + --outputPath${crossrefDumpPath}/files/ + + + + + + + + yarn-cluster + cluster + SparkUnpackCrossrefEntries + eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn-cluster + --sourcePath${crossrefDumpPath}/files + --targetPath${crossrefDumpPath}/crossref_unpack/ + + + + + + + + + + \ No newline at end of file From 82086f3422955934561a65ef5d8b8526ffbbfce4 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 17:42:14 +0200 Subject: [PATCH 224/287] fixed directory name --- .../eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml | 0 .../resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml | 0 .../resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml | 0 .../dnetlib/dhp/actionmanager/actionset_h2020_classification.xml | 0 .../eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml | 0 .../main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml | 0 .../eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml | 0 .../dhp/dedup/otherresearchproduct_dedup_configuration.xml | 0 .../eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml | 0 .../eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml | 0 .../eu/dnetlib/dhp/dedup/software_dedup_configuration.xml | 0 .../resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml | 0 .../resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml | 0 .../main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml | 0 .../main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml | 0 21 files changed, 0 insertions(+), 0 deletions(-) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml (100%) diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml From 7c0c67bdd66e602bbc169358bd3a678f0edee931 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 17:45:53 +0200 Subject: [PATCH 225/287] added mock pom --- dhp-workflows/dhp-workflow-profiles/pom.xml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 dhp-workflows/dhp-workflow-profiles/pom.xml diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml new file mode 100644 index 000000000..b1c51c497 --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -0,0 +1,12 @@ + + + + dhp-workflows + eu.dnetlib.dhp + 1.2.4-SNAPSHOT + + 4.0.0 + + dhp-workflow-profiles + + \ No newline at end of file From e5cf11d088655b2dfd3ca76781bb820aa847fbbc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 19 Aug 2021 10:29:04 +0200 Subject: [PATCH 226/287] change open access route to result matching hbm to gold --- .../oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index db7ab4ac0..0e047d016 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -30,7 +30,7 @@ object SparkApplyHostedByMapToResult { inst.getHostedby.setValue(ei.getName) if (ei.getOpenAccess) { inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) - inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.hybrid) + inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance())); } From 812bd54c57d63994fde566bb4f6a41bf1cee5d2f Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 19 Aug 2021 11:30:14 +0200 Subject: [PATCH 227/287] different funders in blacklist from BETA and PROD aggregator --- .../dhp/provision/00_beta_graph_for_IIS.xml | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml index ef2205e32..2fed35f44 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml @@ -11,10 +11,20 @@ IIS 30 - - set blacklist of funder nsPrefixes + + set blacklist of funder nsPrefixes from the beta aggregator - nsPrefixBlacklist + nsPrefixBlacklist_BETA + gsrt________,rcuk________,fct_________ + + + + + + + set blacklist of funder nsPrefixes from the production aggregator + + nsPrefixBlacklist_PROD gsrt________,rcuk________ @@ -375,7 +385,7 @@ 'reuseOAF' : 'reuseOAF_BETA', 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', 'contentPath' : 'betaContentPath', - 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_BETA', 'shouldPatchRelations' : 'shouldPatchRelations_BETA', 'idMappingPath' : 'idMappingPath' } @@ -421,7 +431,7 @@ 'reuseOAF' : 'reuseOAF_PROD', 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', 'contentPath' : 'prodContentPath', - 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_PROD', 'shouldPatchRelations' : 'shouldPatchRelations_PROD', 'idMappingPath' : 'idMappingPath' } From a053e1513cf489f997c88d583d2d59a8a10ae378 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 19 Aug 2021 11:32:27 +0200 Subject: [PATCH 228/287] different funders in blacklist from BETA and PROD aggregator --- .../dnetlib/dhp/provision/02_beta_graph.xml | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml index 344e99044..f83337b3c 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -11,10 +11,20 @@ Data Provision 30 - - set blacklist of funder nsPrefixes + + set blacklist of funder nsPrefixes from the beta aggregator - nsPrefixBlacklist + nsPrefixBlacklist_BETA + gsrt________,rcuk________,fct_________ + + + + + + + set blacklist of funder nsPrefixes from the production aggregator + + nsPrefixBlacklist_PROD gsrt________,rcuk________ @@ -497,7 +507,7 @@ 'reuseOAF' : 'reuseOAF_BETA', 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', 'contentPath' : 'betaContentPath', - 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_BETA', 'shouldPatchRelations' : 'shouldPatchRelations_BETA', 'idMappingPath' : 'idMappingPath' } @@ -543,7 +553,7 @@ 'reuseOAF' : 'reuseOAF_PROD', 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', 'contentPath' : 'prodContentPath', - 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_PROD', 'shouldPatchRelations' : 'shouldPatchRelations_PROD', 'idMappingPath' : 'idMappingPath' } From 65822400ce9fc1597a1f7bc5674c536047c55dfd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:10:35 +0200 Subject: [PATCH 229/287] CrossrefDump - added new parameter file that was missing --- .../generate_dataset_params.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json new file mode 100644 index 000000000..d903ebe1d --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json @@ -0,0 +1,16 @@ +[ + {"paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the source path", + "paramRequired": true}, + {"paramName":"m", + "paramLongName":"master", + "paramDescription": "the master name", + "paramRequired": true}, + {"paramName":"t", + "paramLongName":"targetPath", + "paramDescription": "the target path", + "paramRequired": true} + +] + From f3b6c392c1655511464f388824ff80ca564410f9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:10:58 +0200 Subject: [PATCH 230/287] CrossrefDump - moving parameter file under folder crossref_dump_reader --- .../doiboost/{ => crossref_dump_reader}/crossref_dump_reader.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{ => crossref_dump_reader}/crossref_dump_reader.json (100%) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/crossref_dump_reader.json similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/crossref_dump_reader.json From 35880c0e7b35b808c612dfe7efcde4e091d7fd35 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:11:35 +0200 Subject: [PATCH 231/287] CrossrefDump - changed the wf to be able to resume from one of the steps --- .../crossref_dump_reader/oozie_app/workflow.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index 6e4f17912..c915783aa 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -41,6 +41,16 @@ + + + + + ${wf:conf('resumeFrom') eq 'ImportCrossRef'} + ${wf:conf('resumeFrom') eq 'Unpack'} + + + + From 45c62609af435e1fbd93690a8121261476befa8d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:12:31 +0200 Subject: [PATCH 232/287] CrossrefDump - modified because parameter file was moved --- .../eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java index c7d3770a0..d1861ff0a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java @@ -26,7 +26,7 @@ public class ExtractCrossrefRecords { .toString( ExtractCrossrefRecords.class .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json"))); + "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/crossref_dump_reader.json"))); parser.parseArgument(args); final String hdfsServerUri = parser.get("hdfsServerUri"); final String workingPath = hdfsServerUri.concat(parser.get("workingPath")); From 882abb40e466a2232a879b9113a0c28b2362848b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:12:53 +0200 Subject: [PATCH 233/287] CrossrefDump - --- .../dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml | 3 --- 1 file changed, 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index c915783aa..4a98a4bb4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -51,9 +51,6 @@ - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] From 4c1474e6938cdd9cef37cec895b8aa232ac59758 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Fri, 20 Aug 2021 17:03:30 +0200 Subject: [PATCH 234/287] Dealing with #6859#note-2: we have to decode URLs to avoid & and other chars encoded becasue of the original XML representation of data --- .../dhp/oa/graph/raw/OafToOafMapper.java | 9 +++ .../dhp/oa/graph/raw/OdfToOafMapper.java | 18 ++++- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 35 +++++++++ .../dnetlib/dhp/oa/graph/raw/encoded-url.xml | 40 ++++++++++ .../dhp/oa/graph/raw/encoded-url_odf.xml | 75 +++++++++++++++++++ 5 files changed, 173 insertions(+), 4 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 2b49a9dc1..5f9491029 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -164,6 +166,13 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .filter(n -> StringUtils.isNotBlank(n.getText())) .map(n -> n.getText().trim()) .filter(u -> u.startsWith("http")) + .map(s -> { + try { + return URLDecoder.decode(s, "UTF-8"); + } catch (UnsupportedEncodingException e) { + return s; + } + }) .distinct() .collect(Collectors.toCollection(ArrayList::new))); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 02aab4f16..a365ac9aa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; import java.util.*; import java.util.stream.Collectors; @@ -137,17 +139,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Set url = new HashSet<>(); for (final Object o : doc .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='URL']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='landingPage']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='URL']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='DOI']")) { @@ -163,6 +165,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { return Arrays.asList(instance); } + protected String trimAndDecodeUrl(String url){ + try { + return URLDecoder.decode(url.trim(), "UTF-8"); + } catch (UnsupportedEncodingException e) { + return url; + } + } + @Override protected List> prepareSources(final Document doc, final DataInfo info) { return new ArrayList<>(); // Not present in ODF ??? diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 5228d1d02..cc903b49e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Objects; import java.util.Optional; +import com.fasterxml.jackson.core.JsonProcessingException; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.dom4j.DocumentException; @@ -759,6 +760,40 @@ class MappersTest { assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); } + + @Test + void testXMLEncodedURL() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url.xml"))); + final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Publication p = (Publication) list.get(0); + assertTrue(p.getInstance().size() > 0); + + String decoded = "https://www.ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5af388993&appId=PPGMS"; + assertEquals(decoded, p.getInstance().get(0).getUrl().get(0)); + } + + @Test + void testXMLEncodedURL_ODF() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url_odf.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Dataset p = (Dataset) list.get(0); + assertTrue(p.getInstance().size() > 0); + for(String url : p.getInstance().get(0).getUrl()){ + System.out.println(url); + assertTrue(!url.contains("&")); + } + } + private void assertValidId(final String id) { // System.out.println(id); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml new file mode 100644 index 000000000..c9cafea4f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml @@ -0,0 +1,40 @@ + + +
+ r3c4b2081b22::0001f1a60b1acbb28dd66b4f6c7d881f + https://www.ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5af388993&appId=PPGMS + 2021-06-24T10:40:25.346Z + r3c4b2081b22 + 2021-06-24T10:40:55.153Z +
+ + https://www.ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5af388993&appId=PPGMS + Progress report on preparing Strategic plan for Technology transfer from EPPL/FMPI CU + OPEN + + Documents, reports + 0034 + Progress report on preparation process of the technology transfer plan for CU. + corda__h2020::692335 + + + + + file%3A%2F%2F%2Fvar%2Flib%2Fdnet%2Fdata%2Fopendata%2Fcordis-h2020projectDeliverables.tsv + + + + + + + false + false + 0.9 + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml new file mode 100644 index 000000000..b970605a6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml @@ -0,0 +1,75 @@ + + + + opentrials__::0000bf8e63d3d7e6b88421eabafae3f6 + feabb67c-1fd1-423b-aec6-606d04ce53c6 + 2019-03-27T15:15:22.22Z + opentrials__ + 2019-04-17T16:04:20.586Z + + + + https://clinicaltrials.gov/ct2/show/NCT02321059&test=yes + + http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059&test=yes + NCT02321059 + + + + Jensen, Kristian K + + + + Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia + + nct + + Denmark + + 0037 + + Patients with an incisional hernia in the midline and controls with an intact abdominal wall are examined twice with one week apart, in order to establish the test-retest reliability and internal and external validity of the Goodstrength trunk dynamometer. + + + OPEN + 0037 + 2014-11-11 + + + + + false + false + 0.9 + + + + + + + + + file:///var/lib/dnet/data/opentrials/opentrials.csv + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file From 412d2cb16a6c3f6352ec720cf2e2bf591ca00f7d Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Mon, 23 Aug 2021 14:32:00 +0200 Subject: [PATCH 235/287] added dependencies to classgraph and opencsv. Bumped version of dhp-schemas --- pom.xml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1a3523f2b..99525ef85 100644 --- a/pom.xml +++ b/pom.xml @@ -206,6 +206,12 @@ 1.0.7 + + me.xuender + unidecode + 0.0.7 + + com.google.guava guava @@ -514,6 +520,17 @@ ${common.text.version} + + com.opencsv + opencsv + 5.5 + + + io.github.classgraph + classgraph + 4.8.71 + + @@ -736,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.15-SNAPSHOT] + [2.7.17] [4.0.3] [6.0.5] [3.1.6] From f19b04d41b0a13503a7c5e559c4bf3b330559c75 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Mon, 23 Aug 2021 14:33:39 +0200 Subject: [PATCH 236/287] code formatting after mvn compile --- .../raw/MigrateDbEntitiesApplicationTest.java | 166 +++++++++--------- 1 file changed, 85 insertions(+), 81 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 2078735f3..b65bd9fd8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -51,8 +51,11 @@ public class MigrateDbEntitiesApplicationTest { public void setUp() { lenient() .when(vocs.getTermAsQualifier(anyString(), anyString())) - .thenAnswer(invocation -> OafMapperUtils - .qualifier(invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), invocation.getArgument(0))); + .thenAnswer( + invocation -> OafMapperUtils + .qualifier( + invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), + invocation.getArgument(0))); lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true); @@ -235,72 +238,73 @@ public class MigrateDbEntitiesApplicationTest { private List prepareMocks(final String jsonFile) throws IOException, SQLException { final String json = IOUtils.toString(getClass().getResourceAsStream(jsonFile)); final ObjectMapper mapper = new ObjectMapper(); - final List list = mapper.readValue(json, new TypeReference>() {}); + final List list = mapper.readValue(json, new TypeReference>() { + }); for (final TypedField tf : list) { if (tf.getValue() == null) { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito.when(rs.getBoolean(tf.getField())).thenReturn(false); - break; - case "date": - Mockito.when(rs.getDate(tf.getField())).thenReturn(null); - break; - case "int": - Mockito.when(rs.getInt(tf.getField())).thenReturn(0); - break; - case "double": - Mockito.when(rs.getDouble(tf.getField())).thenReturn(0.0); - break; - case "array": - Mockito.when(rs.getArray(tf.getField())).thenReturn(null); - break; - case "string": - default: - Mockito.when(rs.getString(tf.getField())).thenReturn(null); - break; + case "not_used": + break; + case "boolean": + Mockito.when(rs.getBoolean(tf.getField())).thenReturn(false); + break; + case "date": + Mockito.when(rs.getDate(tf.getField())).thenReturn(null); + break; + case "int": + Mockito.when(rs.getInt(tf.getField())).thenReturn(0); + break; + case "double": + Mockito.when(rs.getDouble(tf.getField())).thenReturn(0.0); + break; + case "array": + Mockito.when(rs.getArray(tf.getField())).thenReturn(null); + break; + case "string": + default: + Mockito.when(rs.getString(tf.getField())).thenReturn(null); + break; } } else { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito - .when(rs.getBoolean(tf.getField())) - .thenReturn(Boolean.parseBoolean(tf.getValue().toString())); - break; - case "date": - Mockito - .when(rs.getDate(tf.getField())) - .thenReturn(Date.valueOf(tf.getValue().toString())); - break; - case "int": - Mockito - .when(rs.getInt(tf.getField())) - .thenReturn(new Integer(tf.getValue().toString())); - break; - case "double": - Mockito - .when(rs.getDouble(tf.getField())) - .thenReturn(new Double(tf.getValue().toString())); - break; - case "array": - final Array arr = Mockito.mock(Array.class); - final String[] values = ((List) tf.getValue()) - .stream() - .filter(Objects::nonNull) - .map(o -> o.toString()) - .toArray(String[]::new); + case "not_used": + break; + case "boolean": + Mockito + .when(rs.getBoolean(tf.getField())) + .thenReturn(Boolean.parseBoolean(tf.getValue().toString())); + break; + case "date": + Mockito + .when(rs.getDate(tf.getField())) + .thenReturn(Date.valueOf(tf.getValue().toString())); + break; + case "int": + Mockito + .when(rs.getInt(tf.getField())) + .thenReturn(new Integer(tf.getValue().toString())); + break; + case "double": + Mockito + .when(rs.getDouble(tf.getField())) + .thenReturn(new Double(tf.getValue().toString())); + break; + case "array": + final Array arr = Mockito.mock(Array.class); + final String[] values = ((List) tf.getValue()) + .stream() + .filter(Objects::nonNull) + .map(o -> o.toString()) + .toArray(String[]::new); - Mockito.when(arr.getArray()).thenReturn(values); - Mockito.when(rs.getArray(tf.getField())).thenReturn(arr); - break; - case "string": - default: - Mockito.when(rs.getString(tf.getField())).thenReturn(tf.getValue().toString()); - break; + Mockito.when(arr.getArray()).thenReturn(values); + Mockito.when(rs.getArray(tf.getField())).thenReturn(arr); + break; + case "string": + default: + Mockito.when(rs.getString(tf.getField())).thenReturn(tf.getValue().toString()); + break; } } } @@ -312,27 +316,27 @@ public class MigrateDbEntitiesApplicationTest { for (final TypedField tf : list) { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito.verify(rs, Mockito.atLeastOnce()).getBoolean(tf.getField()); - break; - case "date": - Mockito.verify(rs, Mockito.atLeastOnce()).getDate(tf.getField()); - break; - case "int": - Mockito.verify(rs, Mockito.atLeastOnce()).getInt(tf.getField()); - break; - case "double": - Mockito.verify(rs, Mockito.atLeastOnce()).getDouble(tf.getField()); - break; - case "array": - Mockito.verify(rs, Mockito.atLeastOnce()).getArray(tf.getField()); - break; - case "string": - default: - Mockito.verify(rs, Mockito.atLeastOnce()).getString(tf.getField()); - break; + case "not_used": + break; + case "boolean": + Mockito.verify(rs, Mockito.atLeastOnce()).getBoolean(tf.getField()); + break; + case "date": + Mockito.verify(rs, Mockito.atLeastOnce()).getDate(tf.getField()); + break; + case "int": + Mockito.verify(rs, Mockito.atLeastOnce()).getInt(tf.getField()); + break; + case "double": + Mockito.verify(rs, Mockito.atLeastOnce()).getDouble(tf.getField()); + break; + case "array": + Mockito.verify(rs, Mockito.atLeastOnce()).getArray(tf.getField()); + break; + case "string": + default: + Mockito.verify(rs, Mockito.atLeastOnce()).getString(tf.getField()); + break; } } } From 00a28c008079266c54d7ac07d0646bf80ff80c2d Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Mon, 23 Aug 2021 15:02:21 +0200 Subject: [PATCH 237/287] originalId was renamed to acronym --- .../dhp/oa/graph/dump/complete/CreateEntityTest.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index f881e6b30..b5f73da71 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -97,7 +97,7 @@ public class CreateEntityTest { Assertions.assertEquals(12, riList.size()); riList.stream().forEach(c -> { - switch (c.getOriginalId()) { + switch (c.getAcronym()) { case "mes": Assertions .assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_COMMUNITY)); @@ -115,9 +115,9 @@ public class CreateEntityTest { String .format( "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5(c.getOriginalId())))); + DHPUtils.md5(c.getAcronym())))); Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_mes")); - Assertions.assertTrue("mes".equals(c.getOriginalId())); + Assertions.assertTrue("mes".equals(c.getAcronym())); break; case "clarin": Assertions @@ -130,9 +130,9 @@ public class CreateEntityTest { String .format( "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5(c.getOriginalId())))); + DHPUtils.md5(c.getAcronym())))); Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_clarin")); - Assertions.assertTrue("clarin".equals(c.getOriginalId())); + Assertions.assertTrue("clarin".equals(c.getAcronym())); break; } // TODO add check for all the others Entities From 45898c71acea6ce2fc3b969dde4189083eb6b03a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 24 Aug 2021 15:20:04 +0200 Subject: [PATCH 238/287] fixed wrong doi in pubmed --- .../dhp/sx/graph/SparkResolveRelation.scala | 12 +- .../dhp/sx/graph/bio/pubmed/PubMedToOaf.scala | 28 +- .../sx/graph/bio/pubmed/BioScholixTest.scala | 13 +- .../sx/graph/scholix/ScholixGraphTest.scala | 11 + .../dnetlib/dhp/sx/graph/scholix/result.json | 508 ++++++++++++++++++ 5 files changed, 558 insertions(+), 14 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/scholix/result.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala index b2fddec20..1b13b81c7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala @@ -80,7 +80,7 @@ object SparkResolveRelation { } - private def extractPidsFromRecord(input:String):(String,List[(String,String)]) = { + def extractPidsFromRecord(input:String):(String,List[(String,String)]) = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val id:String = (json \ "id").extract[String] @@ -90,7 +90,15 @@ object SparkResolveRelation { JField("qualifier", JObject(qualifier)) <- pids JField("classname", JString(pidType)) <- qualifier } yield (pidValue, pidType) - (id,result) + + val alternateIds: List[(String,String)] = for { + JObject(pids) <- json \\ "alternateIdentifier" + JField("value", JString(pidValue)) <- pids + JField("qualifier", JObject(qualifier)) <- pids + JField("classname", JString(pidType)) <- qualifier + } yield (pidValue, pidType) + + (id,result:::alternateIds) } private def extractPidResolvedTableFromJsonRDD(spark: SparkSession, entityPath: String, workingPath: String) = { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala index 9a49deebc..377bd902d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.sx.graph.bio.pubmed - +import java.util.regex.Pattern import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} +import eu.dnetlib.dhp.schema.oaf.utils.{CleaningFunctions, GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ import scala.collection.JavaConverters._ @@ -15,6 +15,20 @@ object PubMedToOaf { "doi" -> "https://dx.doi.org/" ) + def cleanDoi(doi:String):String = { + + val regex = "10.\\d{4,9}\\/[-._;()\\/:A-Z0-9]+$" + + + val pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE) + val matcher = pattern.matcher(doi) + + if (matcher.find) { + return matcher.group(0) + } + null + } + def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { val result_typologies = getVocabularyTerm(ModelConstants.DNET_RESULT_TYPOLOGIES, vocabularies, cobjQualifier.getClassid) result_typologies.getClassid match { @@ -60,8 +74,12 @@ object PubMedToOaf { var pidList: List[StructuredProperty] = List(OafMapperUtils.structuredProperty(article.getPmid, PidType.pmid.toString, PidType.pmid.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo)) if (pidList == null) return null + + var alternateIdentifier :StructuredProperty = null if (article.getDoi != null) { - pidList = pidList ::: List(OafMapperUtils.structuredProperty(article.getDoi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo)) + val normalizedPid = cleanDoi(article.getDoi) + if (normalizedPid!= null) + alternateIdentifier = OafMapperUtils.structuredProperty(normalizedPid, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo) } // If the article contains the typology Journal Article then we apply this type @@ -84,9 +102,9 @@ object PubMedToOaf { return result result.setDataInfo(dataInfo) i.setPid(pidList.asJava) + if (alternateIdentifier!= null) + i.setAlternateIdentifier(List(alternateIdentifier).asJava) result.setInstance(List(i).asJava) - - i.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection breakOut) val urlLists: List[String] = pidList .map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue)) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala index 894c0a795..8e063db7c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala @@ -1,9 +1,12 @@ package eu.dnetlib.dhp.sx.graph.bio.pubmed import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature} +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.utils.{CleaningFunctions, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.ScholixResolved import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF +import eu.dnetlib.dhp.sx.graph.bio.pubmed.PubMedToOaf.dataInfo import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse @@ -64,6 +67,9 @@ class BioScholixTest extends AbstractVocabularyTest{ assertEquals(10, r.size) assertTrue(r.map(p => p.asInstanceOf[Result]).flatMap(p => p.getInstance().asScala.map(i => i.getInstancetype.getClassid)).exists(p => "0037".equalsIgnoreCase(p))) println(mapper.writeValueAsString(r.head)) + + + } @@ -179,13 +185,6 @@ class BioScholixTest extends AbstractVocabularyTest{ val result:List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s)) assertTrue(result.nonEmpty) - - - - - - - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala index aaac2b7ee..5b7fbe1cf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala @@ -4,6 +4,7 @@ import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, Ser import eu.dnetlib.dhp.schema.oaf.{Relation, Result} import eu.dnetlib.dhp.schema.sx.scholix.Scholix import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary +import eu.dnetlib.dhp.sx.graph.SparkResolveRelation import eu.dnetlib.dhp.sx.graph.bio.pubmed.AbstractVocabularyTest import org.json4s import org.json4s.DefaultFormats @@ -30,6 +31,16 @@ class ScholixGraphTest extends AbstractVocabularyTest{ } + @Test + def testExtractPids():Unit = { + + val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/scholix/result.json")).mkString + val res =SparkResolveRelation.extractPidsFromRecord(input) + assertNotNull(res) + assertTrue(res._2.size == 2) + + } + @Test def testOAFToSummary():Unit= { val inputRelations = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary")).mkString diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/scholix/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/scholix/result.json new file mode 100644 index 000000000..f14d5fedf --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/scholix/result.json @@ -0,0 +1,508 @@ +{ + "collectedfrom": [ + { + "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", + "value": "Europe PubMed Central", + "dataInfo": null + } + ], + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + }, + "lastupdatetimestamp": null, + "id": "50|pmid________::cd23b96c02d937c971c1b56d6aa0bf4f", + "originalId": [ + "10025635" + ], + "pid": [ + { + "value": "10025635", + "qualifier": { + "classid": "pmid", + "classname": "pmid", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofcollection": null, + "dateoftransformation": null, + "extraInfo": null, + "oaiprovenance": null, + "measures": null, + "author": [ + { + "fullname": "D J, Marcellin-Little", + "name": "D J", + "surname": "Marcellin-Little", + "rank": 1, + "pid": null, + "affiliation": null + }, + { + "fullname": "B A, DeYoung", + "name": "B A", + "surname": "DeYoung", + "rank": 2, + "pid": null, + "affiliation": null + }, + { + "fullname": "D H, Doyens", + "name": "D H", + "surname": "Doyens", + "rank": 3, + "pid": null, + "affiliation": null + }, + { + "fullname": "D J, DeYoung", + "name": "D J", + "surname": "DeYoung", + "rank": 4, + "pid": null, + "affiliation": null + } + ], + "resulttype": { + "classid": "dataset", + "classname": "dataset", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "language": null, + "country": null, + "subject": [ + { + "value": "Animals", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Arthroplasty, Replacement, Hip", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Dogs", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Follow-Up Studies", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Hip Joint", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Hip Prosthesis", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Osseointegration", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Prospective Studies", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Radiography", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Survival Analysis", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Treatment Outcome", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "title": [ + { + "value": "Canine uncemented porous-coated anatomic total hip arthroplasty: results of a long-term prospective evaluation of 50 consecutive cases.", + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "relevantdate": null, + "description": [ + { + "value": "To evaluate the long-term clinical and radiographic results of a canine uncemented porous-coated anatomic (PCA) total hip arthroplasty (THA).Prospective study of consecutive clinical patients using survival analysis.Forty-one dogs that underwent PCA THA; nine had bilateral PCA THA (50 prostheses).Gait observation, orthopedic examination, and radiographic assessment were conducted before THA, 6 months after THA, and yearly thereafter. A zonal analysis system was used to document osseous changes in the femur and the acetabulum. Acetabular cup and femoral stem subsidence and migration, femoral canal fill, and implant orientation were measured. Survival analysis of the procedure was conducted.Long-term follow-up was available for 37 dogs (46 prostheses). The median follow-up was 63 months. Limb function was normal for 37 limbs and abnormal for 9 limbs because of dislocation (n = 3), lumbosacral disease (n = 2), degenerative myelopathy (n = 1), autoimmune disease (n = 1), brain tumor (n = 1), or osteosarcoma of the femur (n = 1). All prosthetic stems and cups were fixed by bone ingrowth fixation. Osteolysis was not observed. Bone infarction occurred in five femoral canals (four dogs). The 6-year survival rate for the procedure was 87% (95% confidence interval, 72%-96%).Long-term fixation of the uncemented PCA acetabular cup and stem is successful in dogs, and long-term clinical function is excellent.", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofacceptance": { + "value": "1999-02-20", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "publisher": null, + "embargoenddate": null, + "source": null, + "fulltext": null, + "format": null, + "contributor": null, + "resourcetype": null, + "coverage": null, + "bestaccessright": null, + "context": null, + "externalReference": null, + "instance": [ + { + "license": null, + "accessright": null, + "instancetype": { + "classid": "0037", + "classname": "Clinical Trial", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "hostedby": null, + "url": [ + "https://pubmed.ncbi.nlm.nih.gov/10025635" + ], + "distributionlocation": null, + "collectedfrom": { + "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", + "value": "Europe PubMed Central", + "dataInfo": null + }, + "pid": [ + { + "value": "10025635", + "qualifier": { + "classid": "pmid", + "classname": "pmid", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "alternateIdentifier": [ + { + "value": "10.1053/jvet.1999.0010", + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofacceptance": { + "value": "1999-02-20", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "processingchargeamount": null, + "processingchargecurrency": null, + "refereed": null + } + ], + "storagedate": null, + "device": null, + "size": null, + "version": null, + "lastmetadataupdate": null, + "metadataversionnumber": null, + "geolocation": null +} From ccf4103a25ccfc29d4621190e1df821d399bc929 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 25 Aug 2021 10:07:58 +0200 Subject: [PATCH 239/287] keep the original url if the decoder fails for any reason --- .../main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java | 2 +- .../main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 5f9491029..b7afd3595 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -169,7 +169,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .map(s -> { try { return URLDecoder.decode(s, "UTF-8"); - } catch (UnsupportedEncodingException e) { + } catch (Throwable t) { return s; } }) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 440dbaf6c..194715295 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -168,7 +168,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected String trimAndDecodeUrl(String url) { try { return URLDecoder.decode(url.trim(), "UTF-8"); - } catch (UnsupportedEncodingException e) { + } catch (Throwable t) { return url; } } From e8b3cb9147d63380d64c4ed63e977f57fa42d161 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 30 Aug 2021 09:32:21 +0200 Subject: [PATCH 240/287] Implemented method to download delta updates in EBI Links --- .../datacite/AbstractRestClient.scala | 33 ++--- .../dhp/sx/graph/bio/pubmed/PubMedToOaf.scala | 6 +- .../sx/graph/ebi/SparkDownloadEBILinks.scala | 115 ++++++++++++++++++ .../dhp/sx/graph/ebi/ebi_download_update.json | 5 + .../ebi/update/oozie_app/config-default.xml | 68 +++++++++++ .../graph/ebi/update/oozie_app/workflow.xml | 59 +++++++++ 6 files changed, 269 insertions(+), 17 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala index 92a870e37..bae41b218 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala @@ -8,14 +8,15 @@ import org.apache.http.impl.client.{HttpClientBuilder, HttpClients} import java.io.IOException -abstract class AbstractRestClient extends Iterator[String]{ + +abstract class AbstractRestClient extends Iterator[String] { var buffer: List[String] = List() - var current_index:Int = 0 + var current_index: Int = 0 var scroll_value: Option[String] = None - var complete:Boolean = false + var complete: Boolean = false def extractInfo(input: String): Unit @@ -23,13 +24,13 @@ abstract class AbstractRestClient extends Iterator[String]{ protected def getBufferData(): Unit - def doHTTPGETRequest(url:String): String = { + def doHTTPGETRequest(url: String): String = { val httpGet = new HttpGet(url) doHTTPRequest(httpGet) } - def doHTTPPOSTRequest(url:String, json:String): String = { + def doHTTPPOSTRequest(url: String, json: String): String = { val httpPost = new HttpPost(url) if (json != null) { val entity = new StringEntity(json) @@ -46,7 +47,7 @@ abstract class AbstractRestClient extends Iterator[String]{ override def next(): String = { - val next_item:String = buffer(current_index) + val next_item: String = buffer(current_index) current_index = current_index + 1 if (current_index == buffer.size) getBufferData() @@ -54,17 +55,16 @@ abstract class AbstractRestClient extends Iterator[String]{ } - - - private def doHTTPRequest[A <: HttpUriRequest](r: A) :String ={ + private def doHTTPRequest[A <: HttpUriRequest](r: A): String = { val timeout = 60; // seconds val config = RequestConfig.custom() .setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000) .setSocketTimeout(timeout * 1000).build() - val client =HttpClientBuilder.create().setDefaultRequestConfig(config).build() - var tries = 4 - while (tries > 0) { + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + try { + var tries = 4 + while (tries > 0) { println(s"requesting ${r.getURI}") try { val response = client.execute(r) @@ -78,10 +78,15 @@ abstract class AbstractRestClient extends Iterator[String]{ case e: Throwable => println(s"Error on requesting ${r.getURI}") e.printStackTrace() - tries-=1 + tries -= 1 } } "" - } + } finally { + if (client != null) + client.close() + } + } + getBufferData() } \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala index 377bd902d..202eb7b14 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala @@ -1,10 +1,10 @@ package eu.dnetlib.dhp.sx.graph.bio.pubmed -import java.util.regex.Pattern import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.{CleaningFunctions, GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ +import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} +import java.util.regex.Pattern import scala.collection.JavaConverters._ object PubMedToOaf { @@ -17,7 +17,7 @@ object PubMedToOaf { def cleanDoi(doi:String):String = { - val regex = "10.\\d{4,9}\\/[-._;()\\/:A-Z0-9]+$" + val regex = "^10.\\d{4,9}\\/[\\[\\]\\-\\<\\>._;()\\/:A-Z0-9]+$" val pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala new file mode 100644 index 000000000..08e060459 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala @@ -0,0 +1,115 @@ +package eu.dnetlib.dhp.sx.graph.ebi + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.EBILinkItem +import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal} +import org.apache.commons.io.IOUtils +import org.apache.http.client.config.RequestConfig +import org.apache.http.client.methods.{HttpGet, HttpUriRequest} +import org.apache.http.impl.client.HttpClientBuilder +import org.apache.spark.SparkConf +import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.functions.max +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +object SparkDownloadEBILinks { + + + def createEBILinks(pmid:Long):EBILinkItem = { + + val res = requestLinks(pmid) + if (res!=null) + return EBILinkItem(pmid, res) + null + } + + + def requestLinks(PMID:Long):String = { + val r = new HttpGet(s"https://www.ebi.ac.uk/europepmc/webservices/rest/MED/$PMID/datalinks?format=json") + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + try { + var tries = 4 + while (tries > 0) { + println(s"requesting ${r.getURI}") + try { + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + if (response.getStatusLine.getStatusCode > 400) { + tries -= 1 + } + else + return IOUtils.toString(response.getEntity.getContent) + } catch { + case e: Throwable => + println(s"Error on requesting ${r.getURI}") + e.printStackTrace() + tries -= 1 + } + } + "" + } finally { + if (client != null) + client.close() + } + + } + def main(args: Array[String]): Unit = { + + val log: Logger = LoggerFactory.getLogger(getClass) + val MAX_ITEM_PER_PARTITION = 20000 + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(SparkEBILinksToOaf.getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + import spark.implicits._ + + implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) + implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) + implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + val workingPath = parser.get("workingPath") + log.info(s"workingPath -> $workingPath") + + log.info("Getting max pubmedId where the links have been requested") + val links:Dataset[EBILinkItem] = spark.read.load(s"$sourcePath/ebi_links_dataset").as[EBILinkItem] + val lastPMIDRequested =links.map(l => l.id).select(max("value")).first.getLong(0) + + log.info("Retrieving PMID to request links") + val pubmed = spark.read.load(s"$sourcePath/baseline_dataset").as[PMArticle] + pubmed.map(p => p.getPmid.toLong).where(s"value > $lastPMIDRequested").write.mode(SaveMode.Overwrite).save(s"$workingPath/id_to_request") + + val pmidToReq:Dataset[Long] = spark.read.load(s"$workingPath/id_to_request").as[Long] + + val total = pmidToReq.count() + + spark.createDataset(pmidToReq.rdd.repartition((total/MAX_ITEM_PER_PARTITION).toInt).map(pmid =>createEBILinks(pmid)).filter(l => l!= null)).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_update") + + val updates:Dataset[EBILinkItem] =spark.read.load(s"$workingPath/links_update").as[EBILinkItem] + + links.union(updates).groupByKey(_.id) + .reduceGroups{(x,y) => + if (x == null || x.links ==null) + y + if (y ==null || y.links ==null) + x + if (x.links.length > y.links.length) + x + else + y + }.map(_._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_final") + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json new file mode 100644 index 000000000..0ae19234a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json @@ -0,0 +1,5 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath","paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath","paramDescription": "the working path ", "paramRequired": true} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml new file mode 100644 index 000000000..17cd6c9a3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml @@ -0,0 +1,68 @@ + + + + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + + + + + + + + + + + + + + + + + + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml new file mode 100644 index 000000000..1b738caed --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml @@ -0,0 +1,59 @@ + + + + sourcePath + the Working Path + + + workingPath + the Working Path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn-cluster + cluster + Incremental Download EBI Links + eu.dnetlib.dhp.sx.graph.ebi.SparkDownloadEBILinks + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=2000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --workingPath${workingPath} + --masteryarn + + + + + + + \ No newline at end of file From 3762b17f7b557035dbd680e313554ffb11dfcd54 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 31 Aug 2021 20:20:05 +0200 Subject: [PATCH 241/287] added VERSIOn and PART relationship and re-ordered according to my personal and obviously possibly biased ordering --- .../dhp/oa/provision/model/SortableRelationKey.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java index 463c15e9e..cf441a517 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java @@ -18,17 +18,18 @@ public class SortableRelationKey implements Comparable, Ser static { weights.put(ModelConstants.PARTICIPATION, 0); - weights.put(ModelConstants.OUTCOME, 1); weights.put(ModelConstants.AFFILIATION, 2); weights.put(ModelConstants.DEDUP, 3); weights.put(ModelConstants.PUBLICATION_DATASET, 4); - weights.put(ModelConstants.CITATION, 5); - weights.put(ModelConstants.SUPPLEMENT, 6); - weights.put(ModelConstants.REVIEW, 7); - weights.put(ModelConstants.RELATIONSHIP, 8); + weights.put(ModelConstants.SUPPLEMENT, 5); + weights.put(ModelConstants.REVIEW, 6); + weights.put(ModelConstants.RELATIONSHIP, 7); + weights.put(ModelConstants.PART, 8); weights.put(ModelConstants.PROVISION, 9); - weights.put(ModelConstants.SIMILARITY, 10); + weights.put(ModelConstants.VERSION, 10); + weights.put(ModelConstants.SIMILARITY, 11); + weights.put(ModelConstants.CITATION, 12); } private static final long serialVersionUID = 3232323; From 9f8a80deb739893d71eb72e396d964f12f549681 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 1 Sep 2021 14:16:27 +0200 Subject: [PATCH 242/287] fixed wrong import of unresolved relation in openaire --- .../dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml | 2 +- .../java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml index 021704f54..c6332ff7d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml @@ -70,7 +70,7 @@ --sourcePath${mainPath}/datacite_dump --targetPath${mainPath}/datacite_oaf --isLookupUrl${isLookupUrl} - --exportLinkstrue + --exportLinksfalse --masteryarn-cluster diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index ae899c3d8..fdf397ad7 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -131,6 +131,7 @@ public class PrepareRelationsJob { Set relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) + .filter(rel -> !(rel.getSource().startsWith("unresolved") || rel.getTarget().startsWith("unresolved"))) .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) .filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass()))); From d4dadf6d77b102c6eec63386f291a3d37d5942b2 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 2 Sep 2021 14:21:24 +0200 Subject: [PATCH 243/287] reduced max number of PID in Relatedentity --- .../CreateRelatedEntitiesJob_phase1.java | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index b2abbc156..a33a45517 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -101,13 +101,22 @@ public class CreateRelatedEntitiesJob_phase1 { Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class))) .cache(); - final Dataset> entities = readPathEntity(spark, inputEntityPath, clazz) + readPathEntity(spark, inputEntityPath, clazz) .filter("dataInfo.invisible == false") .map( (MapFunction>) e -> new Tuple2<>(e.getId(), asRelatedEntity(e, clazz)), Encoders .tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) - .cache(); + .write() + .mode(SaveMode.Overwrite) + .save("/tmp/beta_provision/working_dir/update_solr/join_partial/relatedEntities/" + clazz.getSimpleName()); + + final Dataset> entities = spark + .read() + .load("/tmp/beta_provision/working_dir/update_solr/join_partial/relatedEntities/" + clazz.getSimpleName()) + .as( + Encoders + .tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))); relsByTarget .joinWith(entities, entities.col("_1").equalTo(relsByTarget.col("_1")), "inner") @@ -140,7 +149,8 @@ public class CreateRelatedEntitiesJob_phase1 { re.setId(entity.getId()); re.setType(EntityType.fromClass(clazz).name()); - re.setPid(entity.getPid()); + if (entity.getPid() != null) + re.setPid(entity.getPid().stream().limit(400).collect(Collectors.toList())); re.setCollectedfrom(entity.getCollectedfrom()); switch (EntityType.fromClass(clazz)) { From 3c6fc2096ca0df92f0b83ce406ac6218af02a7a5 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 7 Sep 2021 10:46:26 +0200 Subject: [PATCH 244/287] fix bug on oai iterator that skip record cleaned --- .../java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index 3f767fd31..566c6b216 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -161,7 +161,7 @@ public class OaiIterator implements Iterator { report.put(e.getClass().getName(), e.getMessage()); final String cleaned = XmlCleaner.cleanAllEntities(xml); try { - doc = DocumentHelper.parseText(xml); + doc = DocumentHelper.parseText(cleaned); } catch (final DocumentException e1) { final String resumptionToken = extractResumptionToken(xml); if (resumptionToken == null) { From aed29156c78192ba4c2841fc6ae179c391f1b088 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 7 Sep 2021 19:05:46 +0200 Subject: [PATCH 245/287] changed behavior in transformation job, that doesn't fail at first error --- .../eu/dnetlib/dhp/transformation/TransformSparkJobNode.java | 4 +++- .../dhp/transformation/xslt/XSLTTransformationFunction.java | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index 4fe79bf76..ed867c7f2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.utils.DHPUtils.*; import java.io.IOException; import java.util.Map; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -126,7 +127,8 @@ public class TransformSparkJobNode { JavaRDD mdstore = inputMDStore .javaRDD() .repartition(getRepartitionNumber(totalInput, rpt)) - .map((Function) x::call); + .map((Function) x::call) + .filter((Function) Objects::nonNull); saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH); log.info("Transformed item {}", ct.getProcessedItems().count()); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java index acf48ccc5..54192a7be 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java @@ -81,7 +81,8 @@ public class XSLTTransformationFunction implements MapFunction Date: Tue, 14 Sep 2021 17:26:15 +0200 Subject: [PATCH 246/287] other task executions go ahead if UnknownHostException happens on a single task --- .../orcid/SparkDownloadOrcidAuthors.java | 18 +++++++++++++++++- .../orcid/SparkDownloadOrcidWorks.java | 16 +++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 2b8e42bf6..8f0b3a094 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -4,8 +4,11 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.FileNotFoundException; +import java.net.InetAddress; +import java.net.UnknownHostException; import java.text.SimpleDateFormat; import java.util.Date; +import java.util.List; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -18,6 +21,7 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.util.LongAccumulator; @@ -78,6 +82,7 @@ public class SparkDownloadOrcidAuthors { LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); + LongAccumulator unknowHostAcc = spark.sparkContext().longAccumulator("error_unknowHost"); logger.info("Retrieving data from lamda sequence file"); JavaPairRDD lamdaFileRDD = sc @@ -107,7 +112,17 @@ public class SparkDownloadOrcidAuthors { httpGet.addHeader("Accept", "application/vnd.orcid+xml"); httpGet.addHeader("Authorization", String.format("Bearer %s", token)); long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = client.execute(httpGet); + CloseableHttpResponse response = null; + try { + response = client.execute(httpGet); + } catch (UnknownHostException u) { + downloaded.setStatusCode(-1); + unknowHostAcc.add(1); + if (client != null) { + client.close(); + } + return downloaded.toTuple2(); + } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; if (reqTime < 1000) { @@ -171,6 +186,7 @@ public class SparkDownloadOrcidAuthors { logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); + logger.info("unknowHostAcc: {}", unknowHostAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index cab538783..457c79adb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,6 +3,8 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.net.InetAddress; +import java.net.UnknownHostException; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -96,6 +98,7 @@ public class SparkDownloadOrcidWorks { LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); + LongAccumulator unknowHostAcc = spark.sparkContext().longAccumulator("error_unknowHost"); JavaPairRDD updatedAuthorsRDD = sc .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class); @@ -154,7 +157,17 @@ public class SparkDownloadOrcidWorks { httpGet.addHeader("Accept", "application/vnd.orcid+xml"); httpGet.addHeader("Authorization", String.format("Bearer %s", token)); long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = client.execute(httpGet); + CloseableHttpResponse response = null; + try { + response = client.execute(httpGet); + } catch (UnknownHostException u) { + downloaded.setStatusCode(-1); + unknowHostAcc.add(1); + if (client != null) { + client.close(); + } + return downloaded.toTuple2(); + } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; if (reqTime < 1000) { @@ -219,6 +232,7 @@ public class SparkDownloadOrcidWorks { logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); + logger.info("unknowHostAcc: {}", unknowHostAcc.value()); }); } From 8b804e7fe1d1602d33bfb388a4e3d99c40acd22c Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Tue, 14 Sep 2021 17:30:52 +0200 Subject: [PATCH 247/287] removed unused imports --- .../eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java | 3 --- .../eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java | 1 - 2 files changed, 4 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 8f0b3a094..c0aa007e5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -4,11 +4,9 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.FileNotFoundException; -import java.net.InetAddress; import java.net.UnknownHostException; import java.text.SimpleDateFormat; import java.util.Date; -import java.util.List; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -21,7 +19,6 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.util.LongAccumulator; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 457c79adb..63ba151f6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,7 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.net.InetAddress; import java.net.UnknownHostException; import java.time.LocalDate; import java.time.format.DateTimeFormatter; From ebf53a1616b256de90aa62255d5c4e2b13d34237 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 15 Sep 2021 16:10:37 +0200 Subject: [PATCH 248/287] added cleaning for relation fields: subRelType & relClass according to dedicated vocabs --- .../dhp/oa/graph/clean/CleaningRuleMap.java | 31 ++++++++--- .../clean/GraphCleaningFunctionsTest.java | 55 ++++++++++++------- .../dnetlib/dhp/oa/graph/clean/relation.json | 10 ++++ .../dnetlib/dhp/oa/graph/clean/synonyms.txt | 12 +++- .../eu/dnetlib/dhp/oa/graph/clean/terms.txt | 39 ++++++++++++- pom.xml | 2 +- 6 files changed, 117 insertions(+), 32 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index 95aa749b2..7a3583289 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -12,6 +12,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.AccessRight; import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; public class CleaningRuleMap extends HashMap, SerializableConsumer> implements Serializable { @@ -24,17 +25,31 @@ public class CleaningRuleMap extends HashMap, SerializableConsumer cleanQualifier(vocabularies, (Qualifier) o)); mapping.put(AccessRight.class, o -> cleanQualifier(vocabularies, (AccessRight) o)); - mapping.put(Country.class, o -> { - final Country c = (Country) o; - if (StringUtils.isBlank(c.getSchemeid())) { - c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE); - c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE); - } - cleanQualifier(vocabularies, c); - }); + mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o)); + mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o)); return mapping; } + private static void cleanRelation(VocabularyGroup vocabularies, Relation r) { + if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) { + Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType()); + r.setSubRelType(newValue.getClassid()); + } + if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_RELCLASS)) { + Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_RELCLASS, r.getRelClass()); + r.setRelClass(newValue.getClassid()); + } + } + + private static void cleanCountry(VocabularyGroup vocabularies, Country o) { + final Country c = o; + if (StringUtils.isBlank(c.getSchemeid())) { + c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE); + c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE); + } + cleanQualifier(vocabularies, c); + } + private static void cleanQualifier(VocabularyGroup vocabularies, Q q) { if (vocabularies.vocabularyExists(q.getSchemeid())) { Qualifier newValue = vocabularies.lookup(q.getSchemeid(), q.getClassid()); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index edcd72ab4..42d9f226c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; import java.io.IOException; +import java.util.Collection; import java.util.List; import java.util.Set; import java.util.stream.Stream; @@ -16,12 +17,12 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -29,7 +30,8 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class GraphCleaningFunctionsTest { - public static final ObjectMapper MAPPER = new ObjectMapper(); + public static final ObjectMapper MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Mock private ISLookUpService isLookUpService; @@ -49,6 +51,23 @@ public class GraphCleaningFunctionsTest { mapping = CleaningRuleMap.create(vocabularies); } + @Test + void testCleanRelations() throws Exception { + + List lines = IOUtils + .readLines(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/relation.json")); + for (String json : lines) { + Relation r_in = MAPPER.readValue(json, Relation.class); + assertNotNull(r_in); + + assertFalse(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_in.getRelClass())); + + Relation r_out = OafCleaner.apply(r_in, mapping); + assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_out.getRelClass())); + assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_SUBRELTYPE).contains(r_out.getSubRelType())); + } + } + @Test void testCleaning() throws Exception { @@ -87,7 +106,7 @@ public class GraphCleaningFunctionsTest { p_out .getPid() .stream() - .map(p -> p.getQualifier()) + .map(StructuredProperty::getQualifier) .allMatch(q -> pidTerms.contains(q.getClassid()))); List poi = p_out.getInstance(); @@ -101,8 +120,8 @@ public class GraphCleaningFunctionsTest { assertEquals(2, poii.getPid().size()); assertTrue( - poii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent()); - assertTrue(poii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent()); + poii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1007/s109090161569x"))); + assertTrue(poii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1008/abcd"))); assertNotNull(poii.getAlternateIdentifier()); assertEquals(2, poii.getAlternateIdentifier().size()); @@ -111,16 +130,12 @@ public class GraphCleaningFunctionsTest { poii .getAlternateIdentifier() .stream() - .filter(s -> s.getValue().equals("10.1007/s109090161569x")) - .findFirst() - .isPresent()); + .anyMatch(s -> s.getValue().equals("10.1007/s109090161569x"))); assertTrue( poii .getAlternateIdentifier() .stream() - .filter(s -> s.getValue().equals("10.1009/qwerty")) - .findFirst() - .isPresent()); + .anyMatch(s -> s.getValue().equals("10.1009/qwerty"))); Publication p_cleaned = GraphCleaningFunctions.cleanup(p_out); @@ -142,8 +157,8 @@ public class GraphCleaningFunctionsTest { assertEquals(2, pcii.getPid().size()); assertTrue( - pcii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent()); - assertTrue(pcii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent()); + pcii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1007/s109090161569x"))); + assertTrue(pcii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1008/abcd"))); assertNotNull(pcii.getAlternateIdentifier()); assertEquals(1, pcii.getAlternateIdentifier().size()); @@ -151,9 +166,7 @@ public class GraphCleaningFunctionsTest { pcii .getAlternateIdentifier() .stream() - .filter(s -> s.getValue().equals("10.1009/qwerty")) - .findFirst() - .isPresent()); + .anyMatch(s -> s.getValue().equals("10.1009/qwerty"))); getAuthorPids(p_cleaned).forEach(pid -> { System.out @@ -172,17 +185,17 @@ public class GraphCleaningFunctionsTest { return pub .getAuthor() .stream() - .map(a -> a.getPid()) - .flatMap(p -> p.stream()) - .map(s -> s.getQualifier()); + .map(Author::getPid) + .flatMap(Collection::stream) + .map(StructuredProperty::getQualifier); } private Stream getAuthorPids(Result pub) { return pub .getAuthor() .stream() - .map(a -> a.getPid()) - .flatMap(p -> p.stream()); + .map(Author::getPid) + .flatMap(Collection::stream); } private List vocs() throws IOException { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json new file mode 100644 index 000000000..97764de00 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json @@ -0,0 +1,10 @@ +{"relType":"resultResult","subRelType":"citation","relClass":"cites","source":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","target":"50|openaire____::007a4870b31056f89b768cf508e1538e"} +{"relType":"resultResult","subRelType":"citation","relClass":"isCitedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"part","relClass":"isPartOf","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"part","relClass":"hasPart","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"review","relClass":"isReviewedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"review","relClass":"reviews","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"relationship","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"publicationDataset","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt index 729296522..79dc7cd2d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt @@ -1231,4 +1231,14 @@ dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-学術雑誌論文(査 dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-紀要論文(査読有り) dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-雑誌記事(査読有り) dnet:review_levels @=@ 0001 @=@ 原著論文(査読有り) -dnet:review_levels @=@ 0001 @=@ 査読論文 \ No newline at end of file +dnet:review_levels @=@ 0001 @=@ 査読論文 +dnet:relation_relClass @=@ Cites @=@ cites +dnet:relation_relClass @=@ IsCitedBy @=@ isCitedBy +dnet:relation_relClass @=@ HasPart @=@ hasPart +dnet:relation_relClass @=@ IsPartOf @=@ isPartOf +dnet:relation_relClass @=@ IsReviewedBy @=@ isReviewedBy +dnet:relation_relClass @=@ Reviews @=@ reviews +dnet:relation_relClass @=@ IsSupplementTo @=@ isSupplementTo +dnet:relation_relClass @=@ IsSupplementedBy @=@ isSupplementedBy +dnet:relation_relClass @=@ IsRelatedTo @=@ isRelatedTo +dnet:relation_subRelType @=@ relationship @=@ publicationDataset \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index ba47aaf5c..bb1e5fbf9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1079,4 +1079,41 @@ dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/DATASET/IS_SUPPLEMENTED dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/AUTHOR/ORCID @=@ An Open Researcher and Contributor ID (ORCID) that can be associated to an author of your publications dnet:review_levels @=@ dnet:review_levels @=@ 0000 @=@ Unknown dnet:review_levels @=@ dnet:review_levels @=@ 0002 @=@ nonPeerReviewed -dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed \ No newline at end of file +dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Cites @=@ Cites +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsCitedBy @=@ IsCitedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasPart @=@ HasPart +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsPartOf @=@ IsPartOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsReviewedBy @=@ IsReviewedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Reviews @=@ Reviews +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSupplementTo @=@ IsSupplementTo +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSupplementedBy @=@ IsSupplementedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsRelatedTo @=@ IsRelatedTo +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Compiles @=@ Compiles +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Continues @=@ Continues +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Documents @=@ Documents +dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasAmongTopNSimilarDocuments @=@ HasAmongTopNSimilarDocuments +dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasVersion @=@ HasVersion +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsAmongTopNSimilarDocuments @=@ IsAmongTopNSimilarDocuments +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsCompiledBy @=@ IsCompiledBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsContinuedBy @=@ IsContinuedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsDerivedFrom @=@ IsDerivedFrom +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsDocumentedBy @=@ IsDocumentedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsNewVersionOf @=@ IsNewVersionOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsObsoletedBy @=@ IsObsoletedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsOriginalFormOf @=@ IsOriginalFormOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsPreviousVersionOf @=@ IsPreviousVersionOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSourceOf @=@ IsSourceOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsVariantFormOf @=@ IsVariantFormOf +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ affiliation @=@ affiliation +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ citation @=@ citation +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ dedup @=@ dedup +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ outcome @=@ outcome +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ part @=@ part +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ participation @=@ participation +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ provision @=@ provision +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ relationship @=@ relationship +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ review @=@ review +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ similarity @=@ similarity +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ supplement @=@ supplement +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ version @=@ version \ No newline at end of file diff --git a/pom.xml b/pom.xml index 99525ef85..61b0ad873 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.17] + [2.7.18] [4.0.3] [6.0.5] [3.1.6] From 663b1556d7adc0b9f8a94f75d99c299e216b08a1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 15 Sep 2021 16:40:25 +0200 Subject: [PATCH 249/287] manually integrating PR#140 https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/140 --- .../main/java/eu/dnetlib/dhp/common/Constants.java | 6 ++++++ .../dhp/common/collection/HttpConnector2.java | 14 +++++++++++++- .../collection/plugin/oai/OaiCollectorPlugin.java | 4 ++-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index 8fab94e92..a62a0ac79 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -52,4 +52,10 @@ public class Constants { public static final String CONTENT_INVALIDRECORDS = "InvalidRecords"; public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems"; + // IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages + // see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit"; + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining"; + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset"; + } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 724f5f0e1..dd46ab1f4 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -15,12 +15,13 @@ import org.apache.http.HttpHeaders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import eu.dnetlib.dhp.common.Constants; import eu.dnetlib.dhp.common.aggregation.AggregatorReport; /** * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java * - * @author jochen, michele, andrea, alessia, claudio + * @author jochen, michele, andrea, alessia, claudio, andreas */ public class HttpConnector2 { @@ -112,6 +113,17 @@ public class HttpConnector2 { } int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); + String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT); + String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING); + + if ((rateLimit != null) && (rateRemaining != null) && (Integer.parseInt(rateRemaining) < 2)) { + if (retryAfter > 0) { + backoffAndSleep(retryAfter); + } else { + backoffAndSleep(1000); + } + } + if (is2xx(urlConn.getResponseCode())) { input = urlConn.getInputStream(); responseType = urlConn.getContentType(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index 878e286e0..2d04b2574 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -66,11 +66,11 @@ public class OaiCollectorPlugin implements CollectorPlugin { } if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) { - throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); + throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + fromDate); } if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) { - throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate); + throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + untilDate); } final Iterator> iters = sets From e9ccdf853f128a144fb193546553fa1bdcd10399 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 15 Sep 2021 18:44:54 +0200 Subject: [PATCH 250/287] related to https://code-repo.d4science.org/D-Net/dnet-hadoop/issues/132 --- .../java/eu/dnetlib/dhp/PropagationConstant.java | 15 ++++++++------- .../SparkOrcidToResultFromSemRelJob.java | 5 +++-- ...SparkResultToCommunityFromOrganizationJob.java | 4 +++- .../SparkResultToCommunityThroughSemRelJob.java | 4 +++- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 0b4a80b2d..0d7c74475 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -68,27 +68,28 @@ public class PropagationConstant { getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_COUNTRY_INSTREPO_CLASS_ID, - PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME)); + PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS)); return nc; } public static DataInfo getDataInfo( - String inference_provenance, String inference_class_id, String inference_class_name) { + String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema) { DataInfo di = new DataInfo(); di.setInferred(true); di.setDeletedbyinference(false); di.setTrust("0.85"); di.setInferenceprovenance(inference_provenance); - di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name)); + di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name, qualifierSchema)); return di; } - public static Qualifier getQualifier(String inference_class_id, String inference_class_name) { + public static Qualifier getQualifier(String inference_class_id, String inference_class_name, String qualifierSchema) { Qualifier pa = new Qualifier(); pa.setClassid(inference_class_id); pa.setClassname(inference_class_name); - pa.setSchemeid(ModelConstants.DNET_PID_TYPES); - pa.setSchemename(ModelConstants.DNET_PID_TYPES); + pa.setSchemeid(qualifierSchema); + pa.setSchemename(qualifierSchema); return pa; } @@ -107,7 +108,7 @@ public class PropagationConstant { r.setRelClass(rel_class); r.setRelType(rel_type); r.setSubRelType(subrel_type); - r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name)); + r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name, ModelConstants.DNET_PROVENANCE_ACTIONS)); return r; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 40faef7f3..68949b900 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -173,13 +173,14 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME)); + p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); p .setDataInfo( getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME)); + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS)); Optional> authorPid = Optional.ofNullable(author.getPid()); if (authorPid.isPresent()) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index cb80a90ca..1289ff644 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -10,6 +10,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -128,7 +129,8 @@ public class SparkResultToCommunityFromOrganizationJob { getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, - PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME))); + PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS))); propagatedContexts.add(newContext); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 3690351fb..7f76ead94 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -124,7 +125,8 @@ public class SparkResultToCommunityThroughSemRelJob { getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, - PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME))); + PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS))); return newContext; } return null; From 853333bdde97db3d7ddc4c82fbe6ab5fa486ce83 Mon Sep 17 00:00:00 2001 From: miconis Date: Mon, 20 Sep 2021 16:21:47 +0200 Subject: [PATCH 251/287] implementation of the whitelist for similarity relations --- .../dhp/oa/dedup/SparkWhitelistSimRels.java | 151 +++ .../dhp/oa/dedup/scan/oozie_app/workflow.xml | 32 + .../oa/dedup/whitelistSimRels_parameters.json | 38 + .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 1181 +++++++++-------- .../dnetlib/dhp/dedup/whitelist.simrels.txt | 2 + 5 files changed, 856 insertions(+), 548 deletions(-) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/whitelist.simrels.txt diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java new file mode 100644 index 000000000..fa7d33570 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java @@ -0,0 +1,151 @@ +package eu.dnetlib.dhp.oa.dedup; + +import java.io.IOException; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.dom4j.DocumentException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.dedup.model.Block; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import eu.dnetlib.pace.config.DedupConfig; +import eu.dnetlib.pace.model.MapDocument; +import eu.dnetlib.pace.util.MapDocumentUtil; +import scala.Tuple2; +import scala.Tuple3; + +public class SparkWhitelistSimRels extends AbstractSparkAction { + + private static final Logger log = LoggerFactory.getLogger(SparkCreateSimRels.class); + + private static final String WHITELIST_SEPARATOR = "####"; + + public SparkWhitelistSimRels(ArgumentApplicationParser parser, SparkSession spark) { + super(parser, spark); + } + + public static void main(String[] args) throws Exception { + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + parser.parseArgument(args); + + SparkConf conf = new SparkConf(); + new SparkWhitelistSimRels(parser, getSparkSession(conf)) + .run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl"))); + } + + @Override + public void run(ISLookUpService isLookUpService) + throws DocumentException, IOException, ISLookUpException, SAXException { + + // read oozie parameters + final String graphBasePath = parser.get("graphBasePath"); + final String isLookUpUrl = parser.get("isLookUpUrl"); + final String actionSetId = parser.get("actionSetId"); + final String workingPath = parser.get("workingPath"); + final int numPartitions = Optional + .ofNullable(parser.get("numPartitions")) + .map(Integer::valueOf) + .orElse(NUM_PARTITIONS); + final String whiteListPath = parser.get("whiteListPath"); + + log.info("numPartitions: '{}'", numPartitions); + log.info("graphBasePath: '{}'", graphBasePath); + log.info("isLookUpUrl: '{}'", isLookUpUrl); + log.info("actionSetId: '{}'", actionSetId); + log.info("workingPath: '{}'", workingPath); + log.info("whiteListPath: '{}'", whiteListPath); + + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + //file format: source####target + Dataset> whiteListRels = spark.createDataset(sc + .textFile(whiteListPath) + //check if the line is in the correct format: id1####id2 + .filter(s -> s.contains(WHITELIST_SEPARATOR) && s.split(WHITELIST_SEPARATOR).length == 2) + .map(s -> new Tuple2<>(s.split(WHITELIST_SEPARATOR)[0], s.split(WHITELIST_SEPARATOR)[1])) + .rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + + // for each dedup configuration + for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { + + final String entity = dedupConf.getWf().getEntityType(); + final String subEntity = dedupConf.getWf().getSubEntityValue(); + log.info("Adding whitelist simrels for: '{}'", subEntity); + + final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity); + + Dataset> entities = spark.createDataset(sc + .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) + .repartition(numPartitions) + .mapToPair( + (PairFunction) s -> { + MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s); + return new Tuple2<>(d.getIdentifier(), "present"); + }) + .rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + + Dataset> whiteListRels1 = whiteListRels + .joinWith(entities, whiteListRels.col("_1").equalTo(entities.col("_1")), "inner") + .map((MapFunction, Tuple2>, Tuple2>) Tuple2::_1, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + + Dataset> whiteListRels2 = whiteListRels1 + .joinWith(entities, whiteListRels1.col("_2").equalTo(entities.col("_1")), "inner") + .map((MapFunction, Tuple2>, Tuple2>) Tuple2::_1, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + + Dataset whiteListSimRels = whiteListRels2 + .map((MapFunction, Relation>) + r -> createSimRel(r._1(), r._2(), entity), + Encoders.bean(Relation.class) + ); + + saveParquet(whiteListSimRels, outputPath, SaveMode.Append); + } + } + + private Relation createSimRel(String source, String target, String entity) { + final Relation r = new Relation(); + r.setSource(source); + r.setTarget(target); + r.setSubRelType("dedupSimilarity"); + r.setRelClass("isSimilarTo"); + r.setDataInfo(new DataInfo()); + + switch (entity) { + case "result": + r.setRelType("resultResult"); + break; + case "organization": + r.setRelType("organizationOrganization"); + break; + default: + throw new IllegalArgumentException("unmanaged entity type: " + entity); + } + return r; + } +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml index 342d83e8e..02fdd8431 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml @@ -20,6 +20,10 @@ workingPath path for the working directory + + whiteListPath + path for the whitelist of similarity relations + dedupGraphPath path for the output graph @@ -130,6 +134,34 @@ --workingPath${workingPath} --numPartitions8000 + + + + + + + yarn + cluster + Add Whitelist Similarity Relations + eu.dnetlib.dhp.oa.dedup.SparkWhitelistSimRels + dhp-dedup-openaire-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --graphBasePath${graphBasePath} + --isLookUpUrl${isLookUpUrl} + --actionSetId${actionSetId} + --workingPath${workingPath} + --whiteListPath${whiteListPath} + --numPartitions8000 + diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json new file mode 100644 index 000000000..0a5cad7c4 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json @@ -0,0 +1,38 @@ +[ + { + "paramName": "la", + "paramLongName": "isLookUpUrl", + "paramDescription": "address for the LookUp", + "paramRequired": true + }, + { + "paramName": "asi", + "paramLongName": "actionSetId", + "paramDescription": "action set identifier (name of the orchestrator)", + "paramRequired": true + }, + { + "paramName": "i", + "paramLongName": "graphBasePath", + "paramDescription": "the base path of the raw graph", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workingPath", + "paramDescription": "path of the working directory", + "paramRequired": true + }, + { + "paramName": "np", + "paramLongName": "numPartitions", + "paramDescription": "number of partitions for the similarity relations intermediate phases", + "paramRequired": false + }, + { + "paramName": "wl", + "paramLongName": "whiteListPath", + "paramDescription": "whitelist file path for the addition of custom simrels", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 2f992bd78..fa03f93a6 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -5,13 +5,16 @@ import static java.nio.file.Files.createTempDirectory; import static org.apache.spark.sql.functions.count; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.lenient; import java.io.File; +import java.io.FileReader; import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; +import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -45,552 +48,634 @@ import scala.Tuple2; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class SparkDedupTest implements Serializable { - @Mock(serializable = true) - ISLookUpService isLookUpService; - - private static SparkSession spark; - private static JavaSparkContext jsc; - - private static String testGraphBasePath; - private static String testOutputBasePath; - private static String testDedupGraphBasePath; - private static final String testActionSetId = "test-orchestrator"; - - @BeforeAll - public static void cleanUp() throws IOException, URISyntaxException { - - testGraphBasePath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) - .toFile() - .getAbsolutePath(); - testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); - - testDedupGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); - - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); - - final SparkConf conf = new SparkConf(); - conf.set("spark.sql.shuffle.partitions", "200"); - spark = SparkSession - .builder() - .appName(SparkDedupTest.class.getSimpleName()) - .master("local[*]") - .config(conf) - .getOrCreate(); - - jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - } - - @BeforeEach - public void setUp() throws IOException, ISLookUpException { - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("organization"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("software"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("dataset"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("otherresearchproduct"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); - } - - @Test - @Order(1) - void createSimRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json"))); - - parser - .parseArgument( - new String[] { - "-i", testGraphBasePath, - "-asi", testActionSetId, - "-la", "lookupurl", - "-w", testOutputBasePath, - "-np", "50" - }); - - new SparkCreateSimRels(parser, spark).run(isLookUpService); - - long orgs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) - .count(); - - long pubs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) - .count(); - - long sw_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")) - .count(); - - long ds_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) - .count(); - - long orp_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) - .count(); - - assertEquals(3082, orgs_simrel); - assertEquals(7036, pubs_simrel); - assertEquals(336, sw_simrel); - assertEquals(442, ds_simrel); - assertEquals(6750, orp_simrel); - } - - @Test - @Order(2) - void cutMergeRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateMergeRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); - - parser - .parseArgument( - new String[] { - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath, - "-cc", - "3" - }); - - new SparkCreateMergeRels(parser, spark).run(isLookUpService); - - long orgs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long pubs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - long sw_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long ds_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long orp_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - assertEquals(0, orgs_mergerel); - assertEquals(0, pubs_mergerel); - assertEquals(0, sw_mergerel); - assertEquals(0, ds_mergerel); - assertEquals(0, orp_mergerel); - - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel")); - FileUtils - .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); - } - - @Test - @Order(3) - void createMergeRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateMergeRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); - - parser - .parseArgument( - new String[] { - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath - }); - - new SparkCreateMergeRels(parser, spark).run(isLookUpService); - - long orgs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .count(); - long pubs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .count(); - long sw_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .count(); - long ds_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .count(); - - long orp_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .count(); - - assertEquals(1272, orgs_mergerel); - assertEquals(1438, pubs_mergerel); - assertEquals(286, sw_mergerel); - assertEquals(472, ds_mergerel); - assertEquals(718, orp_mergerel); - - } - - @Test - @Order(4) - void createDedupRecordTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateDedupRecord.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json"))); - parser - .parseArgument( - new String[] { - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath - }); - - new SparkCreateDedupRecord(parser, spark).run(isLookUpService); - - long orgs_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_deduprecord") - .count(); - long pubs_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") - .count(); - long sw_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord") - .count(); - long ds_deduprecord = jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_deduprecord").count(); - long orp_deduprecord = jsc - .textFile( - testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord") - .count(); - - assertEquals(85, orgs_deduprecord); - assertEquals(65, pubs_deduprecord); - assertEquals(51, sw_deduprecord); - assertEquals(97, ds_deduprecord); - assertEquals(89, orp_deduprecord); - } - - @Test - @Order(5) - void updateEntityTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkUpdateEntity.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json"))); - parser - .parseArgument( - new String[] { - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath - }); - - new SparkUpdateEntity(parser, spark).run(isLookUpService); - - long organizations = jsc.textFile(testDedupGraphBasePath + "/organization").count(); - long publications = jsc.textFile(testDedupGraphBasePath + "/publication").count(); - long projects = jsc.textFile(testDedupGraphBasePath + "/project").count(); - long datasource = jsc.textFile(testDedupGraphBasePath + "/datasource").count(); - long softwares = jsc.textFile(testDedupGraphBasePath + "/software").count(); - long dataset = jsc.textFile(testDedupGraphBasePath + "/dataset").count(); - long otherresearchproduct = jsc.textFile(testDedupGraphBasePath + "/otherresearchproduct").count(); - - long mergedOrgs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedPubs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedSw = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedDs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedOrp = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - assertEquals(896, publications); - assertEquals(838, organizations); - assertEquals(100, projects); - assertEquals(100, datasource); - assertEquals(200, softwares); - assertEquals(389, dataset); - assertEquals(517, otherresearchproduct); - - long deletedOrgs = jsc - .textFile(testDedupGraphBasePath + "/organization") - .filter(this::isDeletedByInference) - .count(); - - long deletedPubs = jsc - .textFile(testDedupGraphBasePath + "/publication") - .filter(this::isDeletedByInference) - .count(); - - long deletedSw = jsc - .textFile(testDedupGraphBasePath + "/software") - .filter(this::isDeletedByInference) - .count(); - - long deletedDs = jsc - .textFile(testDedupGraphBasePath + "/dataset") - .filter(this::isDeletedByInference) - .count(); - - long deletedOrp = jsc - .textFile(testDedupGraphBasePath + "/otherresearchproduct") - .filter(this::isDeletedByInference) - .count(); - - assertEquals(mergedOrgs, deletedOrgs); - assertEquals(mergedPubs, deletedPubs); - assertEquals(mergedSw, deletedSw); - assertEquals(mergedDs, deletedDs); - assertEquals(mergedOrp, deletedOrp); - } - - @Test - @Order(6) - void propagateRelationTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkPropagateRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); - parser - .parseArgument( - new String[] { - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath - }); - - new SparkPropagateRelation(parser, spark).run(isLookUpService); - - long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - - assertEquals(4860, relations); - - // check deletedbyinference - final Dataset mergeRels = spark - .read() - .load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*")) - .as(Encoders.bean(Relation.class)); - final JavaPairRDD mergedIds = mergeRels - .where("relClass == 'merges'") - .select(mergeRels.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2(r.getString(0), "d")); - - JavaRDD toCheck = jsc - .textFile(testDedupGraphBasePath + "/relation") - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()) - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()); - - long deletedbyinference = toCheck.filter(this::isDeletedByInference).count(); - long updated = toCheck.count(); - - assertEquals(updated, deletedbyinference); - } - - @Test - @Order(7) - void testRelations() throws Exception { - testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); - testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); - } - - private void testUniqueness(String path, int expected_total, int expected_unique) { - Dataset rel = spark - .read() - .textFile(getClass().getResource(path).getPath()) - .map( - (MapFunction) s -> new ObjectMapper().readValue(s, Relation.class), - Encoders.bean(Relation.class)); - - assertEquals(expected_total, rel.count()); - assertEquals(expected_unique, rel.distinct().count()); - } - - @AfterAll - public static void finalCleanUp() throws IOException { - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); - } - - public boolean isDeletedByInference(String s) { - return s.contains("\"deletedbyinference\":true"); - } + @Mock(serializable = true) + ISLookUpService isLookUpService; + + private static SparkSession spark; + private static JavaSparkContext jsc; + + private static String testGraphBasePath; + private static String testOutputBasePath; + private static String testDedupGraphBasePath; + private static final String testActionSetId = "test-orchestrator"; + private static String whitelistPath; + private static List whiteList; + + private static String WHITELIST_SEPARATOR = "####"; + + @BeforeAll + public static void cleanUp() throws IOException, URISyntaxException { + + testGraphBasePath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) + .toFile() + .getAbsolutePath(); + testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + testDedupGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + whitelistPath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/whitelist.simrels.txt").toURI()) + .toFile() + .getAbsolutePath(); + whiteList = IOUtils.readLines(new FileReader(whitelistPath)); + + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + + final SparkConf conf = new SparkConf(); + conf.set("spark.sql.shuffle.partitions", "200"); + spark = SparkSession + .builder() + .appName(SparkDedupTest.class.getSimpleName()) + .master("local[*]") + .config(conf) + .getOrCreate(); + + jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + } + + @BeforeEach + public void setUp() throws IOException, ISLookUpException { + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("organization"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("software"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("dataset"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("otherresearchproduct"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); + } + + @Test + @Order(1) + void createSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json"))); + + parser + .parseArgument( + new String[]{ + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-la", "lookupurl", + "-w", testOutputBasePath, + "-np", "50" + }); + + new SparkCreateSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + long pubs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .count(); + + long sw_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")) + .count(); + + long ds_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) + .count(); + + long orp_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) + .count(); + + assertEquals(3082, orgs_simrel); + assertEquals(7036, pubs_simrel); + assertEquals(336, sw_simrel); + assertEquals(442, ds_simrel); + assertEquals(6750, orp_simrel); + } + + @Test + @Order(2) + void whitelistSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkWhitelistSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + + parser + .parseArgument( + new String[]{ + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-la", "lookupurl", + "-w", testOutputBasePath, + "-np", "50", + "-wl", whitelistPath + }); + + new SparkWhitelistSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + long pubs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .count(); + + long ds_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) + .count(); + + long orp_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) + .count(); + + //entities simrels supposed to be equal to the number of previous step (no rels in whitelist) + assertEquals(3082, orgs_simrel); + assertEquals(7036, pubs_simrel); + assertEquals(442, ds_simrel); + assertEquals(6750, orp_simrel); + + //entities simrels to be different from the number of previous step (new simrels in the whitelist) + Dataset sw_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")); + + //check if the first relation in the whitelist exists + assertTrue(sw_simrel + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .filter(rel -> + rel.getSource().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[0]) && rel.getTarget().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[1])).count() > 0); + //check if the second relation in the whitelist exists + assertTrue(sw_simrel + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .filter(rel -> + rel.getSource().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[0]) && rel.getTarget().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[1])).count() > 0); + + assertEquals(338, sw_simrel.count()); + + } + + @Test + @Order(3) + void cutMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateMergeRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + + parser + .parseArgument( + new String[]{ + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath, + "-cc", + "3" + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long pubs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + assertEquals(0, orgs_mergerel); + assertEquals(0, pubs_mergerel); + assertEquals(0, sw_mergerel); + assertEquals(0, ds_mergerel); + assertEquals(0, orp_mergerel); + + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel")); + FileUtils + .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); + } + + @Test + @Order(4) + void createMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateMergeRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + + parser + .parseArgument( + new String[]{ + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .count(); + long pubs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .count(); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .count(); + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .count(); + + assertEquals(1272, orgs_mergerel); + assertEquals(1438, pubs_mergerel); + assertEquals(286, sw_mergerel); + assertEquals(472, ds_mergerel); + assertEquals(718, orp_mergerel); + + } + + @Test + @Order(5) + void createDedupRecordTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateDedupRecord.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json"))); + parser + .parseArgument( + new String[]{ + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath + }); + + new SparkCreateDedupRecord(parser, spark).run(isLookUpService); + + long orgs_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_deduprecord") + .count(); + long pubs_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") + .count(); + long sw_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord") + .count(); + long ds_deduprecord = jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_deduprecord").count(); + long orp_deduprecord = jsc + .textFile( + testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord") + .count(); + + assertEquals(85, orgs_deduprecord); + assertEquals(65, pubs_deduprecord); + assertEquals(49, sw_deduprecord); + assertEquals(97, ds_deduprecord); + assertEquals(89, orp_deduprecord); + } + + @Test + @Order(6) + void updateEntityTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkUpdateEntity.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json"))); + parser + .parseArgument( + new String[]{ + "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + }); + + new SparkUpdateEntity(parser, spark).run(isLookUpService); + + long organizations = jsc.textFile(testDedupGraphBasePath + "/organization").count(); + long publications = jsc.textFile(testDedupGraphBasePath + "/publication").count(); + long projects = jsc.textFile(testDedupGraphBasePath + "/project").count(); + long datasource = jsc.textFile(testDedupGraphBasePath + "/datasource").count(); + long softwares = jsc.textFile(testDedupGraphBasePath + "/software").count(); + long dataset = jsc.textFile(testDedupGraphBasePath + "/dataset").count(); + long otherresearchproduct = jsc.textFile(testDedupGraphBasePath + "/otherresearchproduct").count(); + + long mergedOrgs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedPubs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedSw = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedDs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedOrp = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + assertEquals(896, publications); + assertEquals(838, organizations); + assertEquals(100, projects); + assertEquals(100, datasource); + assertEquals(198, softwares); + assertEquals(389, dataset); + assertEquals(517, otherresearchproduct); + + long deletedOrgs = jsc + .textFile(testDedupGraphBasePath + "/organization") + .filter(this::isDeletedByInference) + .count(); + + long deletedPubs = jsc + .textFile(testDedupGraphBasePath + "/publication") + .filter(this::isDeletedByInference) + .count(); + + long deletedSw = jsc + .textFile(testDedupGraphBasePath + "/software") + .filter(this::isDeletedByInference) + .count(); + + long deletedDs = jsc + .textFile(testDedupGraphBasePath + "/dataset") + .filter(this::isDeletedByInference) + .count(); + + long deletedOrp = jsc + .textFile(testDedupGraphBasePath + "/otherresearchproduct") + .filter(this::isDeletedByInference) + .count(); + + assertEquals(mergedOrgs, deletedOrgs); + assertEquals(mergedPubs, deletedPubs); + assertEquals(mergedSw, deletedSw); + assertEquals(mergedDs, deletedDs); + assertEquals(mergedOrp, deletedOrp); + } + + @Test + @Order(7) + void propagateRelationTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkPropagateRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); + parser + .parseArgument( + new String[]{ + "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + }); + + new SparkPropagateRelation(parser, spark).run(isLookUpService); + + long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); + + assertEquals(4860, relations); + + // check deletedbyinference + final Dataset mergeRels = spark + .read() + .load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*")) + .as(Encoders.bean(Relation.class)); + final JavaPairRDD mergedIds = mergeRels + .where("relClass == 'merges'") + .select(mergeRels.col("target")) + .distinct() + .toJavaRDD() + .mapToPair( + (PairFunction) r -> new Tuple2(r.getString(0), "d")); + + JavaRDD toCheck = jsc + .textFile(testDedupGraphBasePath + "/relation") + .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json)) + .join(mergedIds) + .map(t -> t._2()._1()) + .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json)) + .join(mergedIds) + .map(t -> t._2()._1()); + + long deletedbyinference = toCheck.filter(this::isDeletedByInference).count(); + long updated = toCheck.count(); + + assertEquals(updated, deletedbyinference); + } + + @Test + @Order(8) + void testRelations() throws Exception { + testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); + testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); + } + + private void testUniqueness(String path, int expected_total, int expected_unique) { + Dataset rel = spark + .read() + .textFile(getClass().getResource(path).getPath()) + .map( + (MapFunction) s -> new ObjectMapper().readValue(s, Relation.class), + Encoders.bean(Relation.class)); + + assertEquals(expected_total, rel.count()); + assertEquals(expected_unique, rel.distinct().count()); + } + + @AfterAll + public static void finalCleanUp() throws IOException { + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + } + + public boolean isDeletedByInference(String s) { + return s.contains("\"deletedbyinference\":true"); + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/whitelist.simrels.txt b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/whitelist.simrels.txt new file mode 100644 index 000000000..862ca466d --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/whitelist.simrels.txt @@ -0,0 +1,2 @@ +50|r37b0ad08687::f645b9729d1e1025a72c57883f0f2cac####50|r37b0ad08687::4c55b436743b5c49fa32cd582fd9e1aa +50|datacite____::a90f49f9fde5393c00633bea6e4e374a####50|datacite____::5f55cdee77303ba8a2bf9996c32a330c \ No newline at end of file From 92a63f78fe29bad5162af1a7964710a8d721ccf7 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Mon, 20 Sep 2021 18:25:00 +0200 Subject: [PATCH 252/287] multiple download attempts handling if a connection to orcid server fails --- .../orcid/SparkDownloadOrcidAuthors.java | 103 +++---- .../orcid/SparkDownloadOrcidWorks.java | 97 +++---- .../doiboost/orcid/util/DownloadsReport.java | 10 + .../util/MultiAttemptsHttpConnector.java | 272 ++++++++++++++++++ .../doiboost/orcid/OrcidClientTest.java | 92 +++++- 5 files changed, 439 insertions(+), 135 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/DownloadsReport.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index c0aa007e5..c549beb03 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -4,7 +4,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.FileNotFoundException; -import java.net.UnknownHostException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Optional; @@ -13,10 +12,6 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -26,8 +21,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData; +import eu.dnetlib.doiboost.orcid.util.DownloadsReport; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; +import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector; import scala.Tuple2; public class SparkDownloadOrcidAuthors { @@ -73,17 +72,12 @@ public class SparkDownloadOrcidAuthors { LongAccumulator parsedRecordsAcc = spark.sparkContext().longAccumulator("parsed_records"); LongAccumulator modifiedRecordsAcc = spark.sparkContext().longAccumulator("to_download_records"); LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records"); - LongAccumulator errorHTTP403Acc = spark.sparkContext().longAccumulator("error_HTTP_403"); - LongAccumulator errorHTTP404Acc = spark.sparkContext().longAccumulator("error_HTTP_404"); - LongAccumulator errorHTTP409Acc = spark.sparkContext().longAccumulator("error_HTTP_409"); - LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); - LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); - LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); - LongAccumulator unknowHostAcc = spark.sparkContext().longAccumulator("error_unknowHost"); + LongAccumulator errorsAcc = spark.sparkContext().longAccumulator("errors"); - logger.info("Retrieving data from lamda sequence file"); + String lambdaFilePath = workingPath + lambdaFileName; + logger.info("Retrieving data from lamda sequence file: " + lambdaFilePath); JavaPairRDD lamdaFileRDD = sc - .sequenceFile(workingPath + lambdaFileName, Text.class, Text.class); + .sequenceFile(lambdaFilePath, Text.class, Text.class); final long lamdaFileRDDCount = lamdaFileRDD.count(); logger.info("Data retrieved: {}", lamdaFileRDDCount); @@ -104,57 +98,44 @@ public class SparkDownloadOrcidAuthors { final DownloadedRecordData downloaded = new DownloadedRecordData(); downloaded.setOrcidId(orcidId); downloaded.setLastModifiedDate(lastModifiedDate); - CloseableHttpClient client = HttpClients.createDefault(); - HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); - httpGet.addHeader("Accept", "application/vnd.orcid+xml"); - httpGet.addHeader("Authorization", String.format("Bearer %s", token)); + final HttpClientParams clientParams = new HttpClientParams(); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken(token); + String apiUrl = "https://api.orcid.org/v3.0/" + orcidId + "/record"; + DownloadsReport report = new DownloadsReport(); long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = null; + boolean downloadCompleted = false; + String record = ""; try { - response = client.execute(httpGet); - } catch (UnknownHostException u) { - downloaded.setStatusCode(-1); - unknowHostAcc.add(1); - if (client != null) { - client.close(); + record = httpConnector.getInputSource(apiUrl, report); + downloadCompleted = true; + } catch (CollectorException ce) { + if (!report.isEmpty()) { + int errCode = report.keySet().stream().findFirst().get(); + report.forEach((k, v) -> { + logger.error(k + " " + v); + }); + downloaded.setStatusCode(errCode); + } else { + downloaded.setStatusCode(-4); } - return downloaded.toTuple2(); + errorsAcc.add(1); } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; if (reqTime < 1000) { Thread.sleep(1000 - reqTime); } - int statusCode = response.getStatusLine().getStatusCode(); - downloaded.setStatusCode(statusCode); - if (statusCode != 200) { - switch (statusCode) { - case 403: - errorHTTP403Acc.add(1); - break; - case 404: - errorHTTP404Acc.add(1); - break; - case 409: - errorHTTP409Acc.add(1); - break; - case 503: - errorHTTP503Acc.add(1); - break; - case 525: - errorHTTP525Acc.add(1); - break; - default: - errorHTTPGenericAcc.add(1); - } - return downloaded.toTuple2(); + if (downloadCompleted) { + downloaded.setStatusCode(200); + downloadedRecordsAcc.add(1); + downloaded + .setCompressedData( + ArgumentApplicationParser + .compressArgument(record)); } - downloadedRecordsAcc.add(1); - downloaded - .setCompressedData( - ArgumentApplicationParser - .compressArgument(IOUtils.toString(response.getEntity().getContent()))); - client.close(); return downloaded.toTuple2(); }; @@ -165,27 +146,17 @@ public class SparkDownloadOrcidAuthors { long authorsModifiedCount = authorsModifiedRDD.count(); logger.info("Authors modified count: {}", authorsModifiedCount); - logger.info("Start downloading ..."); - final JavaPairRDD pairRDD = authorsModifiedRDD .repartition(100) .map(downloadRecordFn) .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))); - saveAsSequenceFile(workingPath, outputPath, sc, pairRDD); logger.info("parsedRecordsAcc: {}", parsedRecordsAcc.value()); logger.info("modifiedRecordsAcc: {}", modifiedRecordsAcc.value()); logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); - logger.info("errorHTTP403Acc: {}", errorHTTP403Acc.value()); - logger.info("errorHTTP404Acc: {}", errorHTTP404Acc.value()); - logger.info("errorHTTP409Acc: {}", errorHTTP409Acc.value()); - logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); - logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); - logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); - logger.info("unknowHostAcc: {}", unknowHostAcc.value()); + logger.info("errorsAcc: {}", errorsAcc.value()); }); - } private static void saveAsSequenceFile(String workingPath, String outputPath, JavaSparkContext sc, diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 63ba151f6..0569bacfd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,7 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.net.UnknownHostException; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -12,10 +11,6 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -29,8 +24,12 @@ import com.google.gson.JsonElement; import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData; +import eu.dnetlib.doiboost.orcid.util.DownloadsReport; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; +import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import scala.Tuple2; @@ -91,13 +90,7 @@ public class SparkDownloadOrcidWorks { .sparkContext() .longAccumulator("error_parsing_xml_found"); LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records"); - LongAccumulator errorHTTP403Acc = spark.sparkContext().longAccumulator("error_HTTP_403"); - LongAccumulator errorHTTP404Acc = spark.sparkContext().longAccumulator("error_HTTP_404"); - LongAccumulator errorHTTP409Acc = spark.sparkContext().longAccumulator("error_HTTP_409"); - LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); - LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); - LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); - LongAccumulator unknowHostAcc = spark.sparkContext().longAccumulator("error_unknowHost"); + LongAccumulator errorsAcc = spark.sparkContext().longAccumulator("errors"); JavaPairRDD updatedAuthorsRDD = sc .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class); @@ -151,61 +144,44 @@ public class SparkDownloadOrcidWorks { final DownloadedRecordData downloaded = new DownloadedRecordData(); downloaded.setOrcidId(orcidId); downloaded.setLastModifiedDate(lastUpdateValue); - CloseableHttpClient client = HttpClients.createDefault(); - HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + relativeWorkUrl); - httpGet.addHeader("Accept", "application/vnd.orcid+xml"); - httpGet.addHeader("Authorization", String.format("Bearer %s", token)); + final HttpClientParams clientParams = new HttpClientParams(); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken(token); + String apiUrl = "https://api.orcid.org/v3.0/" + relativeWorkUrl; + DownloadsReport report = new DownloadsReport(); long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = null; + boolean downloadCompleted = false; + String record = ""; try { - response = client.execute(httpGet); - } catch (UnknownHostException u) { - downloaded.setStatusCode(-1); - unknowHostAcc.add(1); - if (client != null) { - client.close(); + record = httpConnector.getInputSource(apiUrl, report); + downloadCompleted = true; + } catch (CollectorException ce) { + if (!report.isEmpty()) { + int errCode = report.keySet().stream().findFirst().get(); + report.forEach((k, v) -> { + logger.error(k + " " + v); + }); + downloaded.setStatusCode(errCode); + } else { + downloaded.setStatusCode(-4); } - return downloaded.toTuple2(); + errorsAcc.add(1); } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; if (reqTime < 1000) { Thread.sleep(1000 - reqTime); } - int statusCode = response.getStatusLine().getStatusCode(); - downloaded.setStatusCode(statusCode); - if (statusCode != 200) { - switch (statusCode) { - case 403: - errorHTTP403Acc.add(1); - break; - case 404: - errorHTTP404Acc.add(1); - break; - case 409: - errorHTTP409Acc.add(1); - break; - case 503: - errorHTTP503Acc.add(1); - break; - case 525: - errorHTTP525Acc.add(1); - break; - default: - errorHTTPGenericAcc.add(1); - logger - .info( - "Downloading {} status code: {}", orcidId, - response.getStatusLine().getStatusCode()); - } - return downloaded.toTuple2(); + if (downloadCompleted) { + downloaded.setStatusCode(200); + downloadedRecordsAcc.add(1); + downloaded + .setCompressedData( + ArgumentApplicationParser + .compressArgument(record)); } - downloadedRecordsAcc.add(1); - downloaded - .setCompressedData( - ArgumentApplicationParser - .compressArgument(IOUtils.toString(response.getEntity().getContent()))); - client.close(); return downloaded.toTuple2(); }; @@ -226,12 +202,7 @@ public class SparkDownloadOrcidWorks { logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value()); logger.info("errorParsingXMLFoundAcc: {}", errorParsingXMLFoundAcc.value()); logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); - logger.info("errorHTTP403Acc: {}", errorHTTP403Acc.value()); - logger.info("errorHTTP409Acc: {}", errorHTTP409Acc.value()); - logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); - logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); - logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); - logger.info("unknowHostAcc: {}", unknowHostAcc.value()); + logger.info("errorsAcc: {}", errorsAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/DownloadsReport.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/DownloadsReport.java new file mode 100644 index 000000000..b06b0af90 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/DownloadsReport.java @@ -0,0 +1,10 @@ + +package eu.dnetlib.doiboost.orcid.util; + +import java.util.LinkedHashMap; + +public class DownloadsReport extends LinkedHashMap { + + public DownloadsReport() { + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java new file mode 100644 index 000000000..5ef6efa26 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java @@ -0,0 +1,272 @@ + +package eu.dnetlib.doiboost.orcid.util; + +import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER; + +import java.io.IOException; +import java.io.InputStream; +import java.net.*; +import java.util.List; +import java.util.Map; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.http.HttpHeaders; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; + +/** + * Derived from eu.dnetlib.dhp.common.collection.HttpConnector2 with custom report and Bearer auth + * + * @author enrico + */ +public class MultiAttemptsHttpConnector { + + private static final Logger log = LoggerFactory.getLogger(MultiAttemptsHttpConnector.class); + + private HttpClientParams clientParams; + + private String responseType = null; + + private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; + + private String authToken = ""; + private String acceptHeaderValue = ""; + private String authMethod = ""; + public final static String BEARER = "BEARER"; + + public MultiAttemptsHttpConnector() { + this(new HttpClientParams()); + } + + public MultiAttemptsHttpConnector(HttpClientParams clientParams) { + this.clientParams = clientParams; + CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL)); + } + + /** + * Given the URL returns the content via HTTP GET + * + * @param requestUrl the URL + * @param report the list of errors + * @return the content of the downloaded resource + * @throws CollectorException when retrying more than maxNumberOfRetry times + */ + public String getInputSource(final String requestUrl, DownloadsReport report) + throws CollectorException { + return attemptDownloadAsString(requestUrl, 1, report); + } + + private String attemptDownloadAsString(final String requestUrl, final int retryNumber, + final DownloadsReport report) throws CollectorException { + + try (InputStream s = attemptDownload(requestUrl, retryNumber, report)) { + return IOUtils.toString(s); + } catch (IOException e) { + log.error(e.getMessage(), e); + throw new CollectorException(e); + } + } + + private InputStream attemptDownload(final String requestUrl, final int retryNumber, + final DownloadsReport report) throws CollectorException, IOException { + + if (retryNumber > getClientParams().getMaxNumberOfRetry()) { + final String msg = String + .format( + "Max number of retries (%s/%s) exceeded, failing.", + retryNumber, getClientParams().getMaxNumberOfRetry()); + log.error(msg); + throw new CollectorException(msg); + } + + log.info("Request attempt {} [{}]", retryNumber, requestUrl); + + InputStream input = null; + + try { + if (getClientParams().getRequestDelay() > 0) { + backoffAndSleep(getClientParams().getRequestDelay()); + } + final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection(); + urlConn.setInstanceFollowRedirects(false); + urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000); + urlConn.setConnectTimeout(getClientParams().getConnectTimeOut() * 1000); + urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent); + + if (!getAcceptHeaderValue().isEmpty()) { + urlConn.addRequestProperty(HttpHeaders.ACCEPT, getAcceptHeaderValue()); + } + if (!getAuthToken().isEmpty() && getAuthMethod().equals(BEARER)) { + urlConn.addRequestProperty(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", getAuthToken())); + } + + if (log.isDebugEnabled()) { + logHeaderFields(urlConn); + } + + int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); + if (is2xx(urlConn.getResponseCode())) { + input = urlConn.getInputStream(); + responseType = urlConn.getContentType(); + return input; + } + if (is3xx(urlConn.getResponseCode())) { + // REDIRECTS + final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); + log.info("The requested url has been moved to {}", newUrl); + report + .put( + urlConn.getResponseCode(), + String.format("Moved to: %s", newUrl)); + urlConn.disconnect(); + if (retryAfter > 0) { + backoffAndSleep(retryAfter); + } + return attemptDownload(newUrl, retryNumber + 1, report); + } + if (is4xx(urlConn.getResponseCode()) || is5xx(urlConn.getResponseCode())) { + switch (urlConn.getResponseCode()) { + case HttpURLConnection.HTTP_NOT_FOUND: + case HttpURLConnection.HTTP_BAD_GATEWAY: + case HttpURLConnection.HTTP_UNAVAILABLE: + case HttpURLConnection.HTTP_GATEWAY_TIMEOUT: + if (retryAfter > 0) { + log + .warn( + "{} - waiting and repeating request after suggested retry-after {} sec.", + requestUrl, retryAfter); + backoffAndSleep(retryAfter * 1000); + } else { + log + .warn( + "{} - waiting and repeating request after default delay of {} sec.", + requestUrl, getClientParams().getRetryDelay()); + backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); + } + report.put(urlConn.getResponseCode(), requestUrl); + urlConn.disconnect(); + return attemptDownload(requestUrl, retryNumber + 1, report); + default: + report + .put( + urlConn.getResponseCode(), + String + .format( + "%s Error: %s", requestUrl, urlConn.getResponseMessage())); + throw new CollectorException(urlConn.getResponseCode() + " error " + report); + } + } + throw new CollectorException( + String + .format( + "Unexpected status code: %s errors: %s", urlConn.getResponseCode(), + MAPPER.writeValueAsString(report))); + } catch (MalformedURLException | UnknownHostException e) { + log.error(e.getMessage(), e); + report.put(-2, e.getMessage()); + throw new CollectorException(e.getMessage(), e); + } catch (SocketTimeoutException | SocketException e) { + log.error(e.getMessage(), e); + report.put(-3, e.getMessage()); + backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000); + return attemptDownload(requestUrl, retryNumber + 1, report); + } + } + + private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { + log.debug("StatusCode: {}", urlConn.getResponseMessage()); + + for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { + if (e.getKey() != null) { + for (String v : e.getValue()) { + log.debug(" key: {} - value: {}", e.getKey(), v); + } + } + } + } + + private void backoffAndSleep(int sleepTimeMs) throws CollectorException { + log.info("I'm going to sleep for {}ms", sleepTimeMs); + try { + Thread.sleep(sleepTimeMs); + } catch (InterruptedException e) { + log.error(e.getMessage(), e); + throw new CollectorException(e); + } + } + + private int obtainRetryAfter(final Map> headerMap) { + for (String key : headerMap.keySet()) { + if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty()) + && NumberUtils.isCreatable(headerMap.get(key).get(0))) { + return Integer.parseInt(headerMap.get(key).get(0)) + 10; + } + } + return -1; + } + + private String obtainNewLocation(final Map> headerMap) throws CollectorException { + for (String key : headerMap.keySet()) { + if ((key != null) && key.equalsIgnoreCase(HttpHeaders.LOCATION) && (headerMap.get(key).size() > 0)) { + return headerMap.get(key).get(0); + } + } + throw new CollectorException("The requested url has been MOVED, but 'location' param is MISSING"); + } + + private boolean is2xx(final int statusCode) { + return statusCode >= 200 && statusCode <= 299; + } + + private boolean is4xx(final int statusCode) { + return statusCode >= 400 && statusCode <= 499; + } + + private boolean is3xx(final int statusCode) { + return statusCode >= 300 && statusCode <= 399; + } + + private boolean is5xx(final int statusCode) { + return statusCode >= 500 && statusCode <= 599; + } + + public String getResponseType() { + return responseType; + } + + public HttpClientParams getClientParams() { + return clientParams; + } + + public void setClientParams(HttpClientParams clientParams) { + this.clientParams = clientParams; + } + + public void setAuthToken(String authToken) { + this.authToken = authToken; + } + + private String getAuthToken() { + return authToken; + } + + public String getAcceptHeaderValue() { + return acceptHeaderValue; + } + + public void setAcceptHeaderValue(String acceptHeaderValue) { + this.acceptHeaderValue = acceptHeaderValue; + } + + public String getAuthMethod() { + return authMethod; + } + + public void setAuthMethod(String authMethod) { + this.authMethod = authMethod; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 9ea7c6959..d4079058e 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -1,13 +1,11 @@ package eu.dnetlib.doiboost.orcid; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.*; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -17,7 +15,6 @@ import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.utils.Lists; -import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -28,8 +25,11 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.schema.orcid.AuthorData; -import eu.dnetlib.doiboost.orcid.xml.XMLRecordParserTest; +import eu.dnetlib.doiboost.orcid.util.DownloadsReport; +import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector; import jdk.nashorn.internal.ir.annotations.Ignore; public class OrcidClientTest { @@ -49,7 +49,7 @@ public class OrcidClientTest { @BeforeAll private static void setUp() throws IOException { - testPath = Files.createTempDirectory(XMLRecordParserTest.class.getName()); + testPath = Files.createTempDirectory(OrcidClientTest.class.getName()); System.out.println("using test path: " + testPath); } @@ -349,4 +349,84 @@ public class OrcidClientTest { final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); logToFile(testPath, "\n\nwork updated \n\n" + work); } + + @Test + void downloadUnknownHostExceptionTest() throws Exception { + logToFile(testPath, "downloadUnknownHostExceptionTest"); + final String orcid = "0000-0001-7291-3210"; + final HttpClientParams clientParams = new HttpClientParams(); + clientParams.setMaxNumberOfRetry(2); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken("78fdb232-7105-4086-8570-e153f4198e3d"); + String wrongApiUrl = "https://api.orcid_UNKNOWN.org/v3.0/" + orcid + "/" + REQUEST_TYPE_RECORD; + String url = "UNKNOWN"; + DownloadsReport report = new DownloadsReport(); + try { + httpConnector.getInputSource(wrongApiUrl, report); + } catch (CollectorException ce) { + logToFile(testPath, "CollectorException downloading: " + ce.getMessage()); + } catch (Throwable t) { + logToFile(testPath, "Throwable downloading: " + t.getMessage()); + } + } + + @Test + void downloadAttemptSuccessTest() throws Exception { + logToFile(testPath, "downloadAttemptSuccessTest"); + final String orcid = "0000-0001-7291-3210"; + final HttpClientParams clientParams = new HttpClientParams(); + clientParams.setMaxNumberOfRetry(2); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken("78fdb232-7105-4086-8570-e153f4198e3d"); + String apiUrl = "https://api.orcid.org/v3.0/" + orcid + "/" + REQUEST_TYPE_RECORD; + String url = "UNKNOWN"; + DownloadsReport report = new DownloadsReport(); + String record = httpConnector.getInputSource(apiUrl, report); + logToFile(testPath, "Downloaded at first attempt record: " + record); + } + + @Test + void downloadAttemptNotFoundTest() throws Exception { + logToFile(testPath, "downloadAttemptNotFoundTest"); + final HttpClientParams clientParams = new HttpClientParams(); + clientParams.setMaxNumberOfRetry(2); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken("78fdb232-7105-4086-8570-e153f4198e3d"); + String apiUrl = "https://api.orcid.org/v3.0/NOTFOUND/" + REQUEST_TYPE_RECORD; + DownloadsReport report = new DownloadsReport(); + try { + httpConnector.getInputSource(apiUrl, report); + } catch (CollectorException ce) { + + } + report.forEach((k, v) -> { + try { + logToFile(testPath, k + " " + v); + } catch (IOException e) { + e.printStackTrace(); + } + }); + } + + @Test + @Ignore + void testDownloadedAuthor() throws Exception { + final String base64CompressedWork = "H4sIAAAAAAAAAI2Yy26jMBSG932KiD0hIe1MiwiVZjGLkWbX2XRHsFOsgs3YJmnefszFFy4+mUhtVPz9P/gcH/vQ9PWrrjYXzAVh9Bjst7tgg2nBEKEfx+DP28/wOdgImVOUV4ziY3DDInjNHlKOC8ZRMnxtmlyWxyDaqU+ofg7h/uX7IYwfn+Ngo25ARUKoxJzm1TEopWySKLper1vGC4LU74+IikgTWoFRW+SyfyyfxCBag4iQhBawyoGMDjdqJrnECJAZRquYLDEPaV5jv8oyWlXj+qTiXZLGr7KMiQbnjAOR6IY1W7C6hgIwjGt6SKGfHsY13ajHYipLIcIyJ5Xw6+akdvjEtyt4wxEwM6+VGph5N2zYr2ENhQRhKsmZYChmS1j7nFs6VIBPOwImKhyfMVeFg6GAWEjrcoQ4FoBmBGwVXYhagGHDBIEX+ZzUDiqyn35VN6rJUpUJ4zc/PAI2T03FbrUKJZQszWjV3zavVOjvVfoE01qB+YUUQPGNwHTt3luxJjdqh1AxJFBKLWOrSeCcF13RtxxYtlPOPqH6m+MLwVfoMQ2kdae2ArLajc6fTxkI1nIoegs0yB426pMO+0fSw07xDKMu0XKSde5C2VvrlVMijRzFwqY7XTJI1QMLWcmEzMxtDdxfHiYSgTNJnYJ1K9y5k0tUrMgrnGGaRiuXxxuClulYUbr0nBvpkYLjvgTCGsuSoex3f1CEvRPHKI184NJKtKeaiO7cD5E61bJ4F+9DFd7d01u8Tw6H5BBvvz8f3q3nXLGIeJULGdaqeVBBRK7rS7h/fNvvk/gpedxt4923dxP7Fc3KtKuc1BhlkrfYmeN4dcmrhmbw60+HmWw2CKgbTuqc32CXKTTmeTWT6bDBjPsQ0DTpnchdaYO0ayQ2FyLIiVREqs25aU8VKYLRbK0BsyZuqvr1MU2Sm/rDdhe/2CRN6FU/b+oBVyj1zqRtC5F8kAumfTclsl+s7EoNQu64nfOaVLeezX60Z3XCULLi6GI2IZGTEeey7fec9lBAuXawIHKcpifE7GABHWfoxLVfpUNPBXoMbZWrHFsR3bPAk9J9i2sw9nW6AQT1mpk++7JhW+v44Hmt8PomJqfD13jRnvFOSxCKtu6qHoyBbQ7cMFo750UEfGaXm6bEeplXIXj2hvL6mA7tzvIwmM9pbJFBG834POZdLGi2gH2u9u0K9HMwn5PTioFWLufzmrS4oNuU9Pkt2rf/2jMs7fMdm2rQTTM+j+49AzToAVuXYA1mD2k0+XdE9vAP+JYR5NcQAAA="; + final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); + logToFile(testPath, "\n\ndownloaded author \n\n" + work); + } + + @Test + @Ignore + void testDownloadedWork() throws Exception { + final String base64CompressedWork = "H4sIAAAAAAAAANVa63LiOBb+z1Oo+LVbhbkGAlTCLE1Id9IhTQV6unr/CVvB2tiWR5Khmal5rX2BfbE9ki3b3Jzt6Y13h6pQSPrOXTo6knL10zffQxvCBWXBdbVVb1YRCWzm0GB9Xf28vLX6VSQkDhzssYBcV3dEVH8aVa62jL8M1RcKI2kBAYwNLnrtXrMPFCGW7nW10YSPBX8dq3XRb1swNGgomkaG3FBBV9SjcnddDaOVR+0qApUCMaSBJDzA3nXVlTIcNhrb7bbOuE0d+F43AtEwCENBnMjGUhtyjiSFGBqHCkkDu5gqB0rpSMgJsCJOAVmKMVRMuoRbAfbJeaoMY6h84q8gQi4Nz1NlmNQbnDNe4Ak1bLA28/0iB8TjBg1GMV5gdzxu0CGoxSBKlkMkpp44T3eINBxeyG5bKDABpJb7QF1guRpOsd/iOWRRhwSSPlNS5LNjsOHzHAXxmjlHmwBSr3DyTDgsNVLkkAxk6LDjcCIKaBJAtoo2FCagFTJBiyf5IdJwUAv2PJUaNUgXlgnju/PgBJDFKfTYzgdXFgXLYAzVLxH2wPWvrfQ9mKEVhG+oXbD4EsD+3H1txqaxgQwBPqRFIc0w2WoSBHNbLfqIF0zbfVymIbQ52VCyLVIzBRm6VeQVRFWNHuoHDASLeJH3jqDVUQXB5yrOH0ObE5UNLQe+R+1mu2U1u1Z7sGy2hq3esN2tt5oXf79qnELv8fGwkJYPmxSswD1uA6vVXrY7w+5g2G3WuxedjNsJmj2escJx33G/ZXsU5iAs/AyRR0WcjpRXBLglc0lM1BjP59bX1qw9Hn/+dH87/dy9vBikeinKkyzVHjoqJNWIk7QuE3KU6pES6O7MwsarJh44QW1KowcWOCxAC9tlzEPsGX3YrYGQICgS0JKzENach2bEoTYNyKEQzaJyQnzSqesKSaV3IhRx92L8tLAm7GerjbZUujSwlFnIobqKkTuth+Q4ED4Vqqypp5JyfK8ah5Ji0f8AZVSGT2TZVGXfBLw/liOyqdRpJqfyXr8ldyEZrehKkm8Jr/2hc3Qb7EVk9DfMJbU98pu3k+6aETXXBebCZpt23tBaBUfSZRxdo98eYmgNfRxrh3zAnldDM/37FvZ+IiWtoQfddgiaEGBIDGCG7btA7jgBP9svAK2h90l4yYqIGop5jgMHXA4J0NB9ksR+YTX0qFtfqACO01jGjDHFPx552AW2W0P3uvGROk4NLfTvCeNS8X9MaDg1rL9Qz6PYh7En3f4ZNmKS6nUfQYFmE6PYe05IYBqPFGaq5wHlYpaoDbYqxokVK+JBerz51z+BIzc+SfSdTHVrTiSYtZzGFNOdGrr5ohsLF2+NUguqppkDoua6/S6yXwAYu44pM+/HiZ1BwEDWMqYbC5fjZ+MEBwMjb4PRLdTFYWrUwiUhJH/H+G3pMl/7fjqJhTGwSwU5lnfLsVDmxIPvmRetbJeCOsvfaxWXbXWxLVziqNky51BLW1OP2JKzgNoASSa7Gk1WAfrLI9mirzBBIUD1r/W/AgrMla7CjEMOzYBJolo30/mnxd0SzadPt5+eZtMb9O7rEN1wNINgEA8Ha+IxNMdrHLCQRR4TFRCudnmB7m6GqD0YDCqW+lQqlfnndw93iw/TJ/RwN5k+TqZDNJkAQyUvUlWvktjrdgbQEeI1EapN8Grd7MOeYJlfajSxWVOMfcIhVQXgfcFsqhcceobVA/U3GjsbDCYrjVSKSz0wHo8Xym6dArRvvjsbAfUGouFr8s5lG9o72DVVSy1saDqMqlarWW+12r2GiIXXMzuAU6AQcLLqWf3mZRf6iOlsNQdda9BudhQnvNNdPWN8XA7BgU5G2k3pLADA75XD3BSnn3y+3M90SbZWGczkxiRVmfSaJrd0V8u0yG3CeYRyht7O07Ste45weuqNmhcpLO44woEPRq1eilLN/f3ntEqGPFfzi2PmudHTO3EOEKf60LdTyUeDr7KIIzKfTfqtdr896JxklQtbES/IQD7UyL+SZIJSXYhLHkHZ9oqEjPR1MRzWu550cDYdCeI9n+S4hzouUU76+UeCQJ0fjkKn0+v3m703i0Eh/z97BCDH/XAAziTIt4rH94j7s4dHbSY/HJ90e3qriBQL+MMxCGETs9j/QxiSQ5PaS63/QsZqdS8vOxdvtj7Oc//fL4dTI2LvDAfVA6erSDKe3+cPxw70j4c5HHZlfLT9iAEZYKjZkxOYKZxymJy659l/t+QZllC5bvVJrzShD5GN0/NkiaZyqNcJh0NrdngtTfp7wviaHB+SS1Ng7O+Sk3h5HodT4S8RyY78pUmGM6eEg1l8tVCa1KnvY/SgrzDKsxRLF46j+uahNKH3BE6lsIb1lUxpUhdS3WUE+u6nPP/qiyAsklumMhMz9SBNqeus0oQ+QXqwIa7m3qy87IhXnBLPI8kVXXlZMaASm5vAEqWuKYkvHMtbPdiPiIdm6dVmeVMZjX+lfnKDWmaRAT7ev6ctTfhEF3RoWnJeXlKfSXcHcsf69rk0wTd4Qx30RV9yl5et2Ipwqe/SS5MJXiU8vbIv2b/qZaC8PZ65AUwj9QJR3vx1mQ9b7VPy1FFebnSpWq7xi0qJuwA+fLYpL7rwJdLXobcSa97kM4Cl35f3YXmofp0+8R9gBc/XeXL9Vn38pH7mLTs27z9T8ky1n7ynlZ0I4le78rYzl6t/woG5krwQlpcRcLDD2UPkH5F73C9G5tFKfY0q/wa1TIHI0CgAAA=="; + final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); + logToFile(testPath, "\n\ndownloaded work \n\n" + work); + } } From b924276e18b8b5a438251b89998471c1b800eb0c Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Fri, 24 Sep 2021 17:11:56 +0200 Subject: [PATCH 253/287] tests to generate records for the EOSC-Future demo with the EOSC Jupyter Notebbok subject --- .../provision/IndexRecordTransformerTest.java | 12 ++++ .../eosc-future/data-transfer-pilot.xml | 72 +++++++++++++++++++ .../training-notebooks-seadatanet.xml | 71 ++++++++++++++++++ .../eu/dnetlib/dhp/oa/provision/fields.xml | 16 +++-- 4 files changed, 167 insertions(+), 4 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index cd07cfcb1..8daf318be 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -76,6 +76,18 @@ public class IndexRecordTransformerTest { testRecordTransformation(record); } + @Test + public void testForEOSCFutureDataTransferPilot() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/data-transfer-pilot.xml")); + testRecordTransformation(record); + } + + @Test + public void testForEOSCFutureTraining() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/training-notebooks-seadatanet.xml")); + testRecordTransformation(record); + } + private void testRecordTransformation(final String record) throws IOException, TransformerException { final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml new file mode 100644 index 000000000..23dd6c6ed --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml @@ -0,0 +1,72 @@ + + +
+ r37b0ad08687::dec0d8520e726f2adda9a51280ac7299 + 2021-09-22T08:53:16Z + under curation + +
+ + + + EGI-Foundation/data-transfer-pilot: Include libraries in environment.yml + Giuseppe La Rocca + Enol Fernández + Andrea Manzi + + + + This notebook is used to demonstrate how a scientist from one of the PaNOSC RIs can use the resources provided by EGI to perform analysis on the data sets obtained during an expirement. + + EOSC Jupyter Notebook + + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:4218562 + + oai:zenodo.org:4218562 + 10.5281/zenodo.4218562 + + + false + false + 0.9 + + + + + + + + + + + + + https://zenodo.org/record/4218562 + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml new file mode 100644 index 000000000..9995b902f --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml @@ -0,0 +1,71 @@ + + +
+ r37b0ad08687::eb430fb7438e1533ba95d6aa50a477eb + 2021-09-22T08:53:13Z + under curation + +
+ + + + + EGI-Foundation/training-notebooks-seadatanet: Version 0.4 + Enol Fernández + + + + A sample notebook using SeaDataNet data to plot a map that shows surface temperature of Black Sea, Arctic Sea and Baltic Sea. The data is available at EGI DataHub with PID http://hdl.handle.net/21.T15999/qVk6JWQ (run at EGI Notebooks service for easy access to data).This release updates the PID for the data. + + EOSC Jupyter Notebook + + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:3561323 + + oai:zenodo.org:3561323 + 10.5281/zenodo.3561323 + + + false + false + 0.9 + + + + + + + + + + + + + https://zenodo.org/record/3561323 + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml index c47975c9d..910a366f6 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml @@ -15,7 +15,13 @@ - + + + + + + + @@ -28,7 +34,8 @@ - + + @@ -79,6 +86,7 @@ + @@ -105,7 +113,7 @@ - + @@ -130,7 +138,7 @@ - + From be79d74e3d7993e8dc160d89ccd0b182dec1b139 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 27 Sep 2021 16:57:04 +0200 Subject: [PATCH 254/287] Fixed DoiBoost generation to point to correct organization in affiliation relation --- .../doiboost/SparkGenerateDoiBoost.scala | 51 ++++++++++++++++++- .../doiboost/generate_doiboost_params.json | 3 +- .../doiboost/process/oozie_app/workflow.xml | 7 +++ 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index b77de13b9..e501b4823 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -13,10 +13,30 @@ import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ +import org.json4s.DefaultFormats +import org.json4s.JsonAST.{JField, JObject, JString,JArray} +import org.json4s.jackson.JsonMethods.parse object SparkGenerateDoiBoost { + def extractIdGRID(input:String):List[(String,String)] = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: org.json4s.JValue = parse(input) + + val id:String = (json \ "id").extract[String] + + val grids:List[String] = for { + + JObject(pid) <- json \ "pid" + JField("qualifier", JObject(qualifier)) <- pid + JField("classid", JString(classid)) <-qualifier + JField("value", JString(vl)) <- pid + if classid == "GRID" + } yield vl + grids.map(g => (id, s"unresolved::grid::${g.toLowerCase}"))(collection.breakOut) + } + def main(args: Array[String]): Unit = { @@ -36,6 +56,7 @@ object SparkGenerateDoiBoost { val hostedByMapPath = parser.get("hostedByMapPath") val workingDirPath = parser.get("workingPath") + val openaireOrganizationPath = parser.get("openaireOrganizationPath") val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable { override def zero: Publication = new Publication @@ -156,7 +177,7 @@ object SparkGenerateDoiBoost { magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).flatMap(item => { val pub:Publication = item._1._2 val affiliation = item._2 - val affId:String = if (affiliation.GridId.isDefined) DoiBoostMappingUtil.generateGridAffiliationId(affiliation.GridId.get) else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString) + val affId:String = if (affiliation.GridId.isDefined) s"unresolved::grid::${affiliation.GridId.get.toLowerCase}" else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString) val r:Relation = new Relation r.setSource(pub.getId) r.setTarget(affId) @@ -174,9 +195,35 @@ object SparkGenerateDoiBoost { r1.setDataInfo(pub.getDataInfo) r1.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) List(r, r1) - })(mapEncoderRel).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation") + })(mapEncoderRel).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved") + + + val unresolvedRels:Dataset[(String, Relation)] = spark.read.load(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved").as[Relation].map(r => { + + if (r.getSource.startsWith("unresolved")) + (r.getSource, r) + else if (r.getTarget.startsWith("unresolved")) + (r.getTarget,r) + else + ("resolved", r) + }) + + val openaireOrganization:Dataset[(String,String)] = spark.read.text(openaireOrganizationPath).as[String].flatMap(s => extractIdGRID(s)).groupByKey(_._2).reduceGroups((x,y) => if (x != null) x else y ).map(_._2) + + unresolvedRels.joinWith(openaireOrganization,unresolvedRels("_1").equalTo(openaireOrganization("_2"))) + .map { x => + val currentRels = x._1._2 + val currentOrgs = x._2 + if (currentOrgs!= null) + if(currentRels.getSource.startsWith("unresolved")) + currentRels.setSource(currentOrgs._1) + else + currentRels.setTarget(currentOrgs._1) + currentRels + }.write.save(s"$workingDirPath/doiBoostPublicationAffiliation") + magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).map( item => { val affiliation = item._2 if (affiliation.GridId.isEmpty) { diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json index 1ff63dd0e..39455fb67 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json @@ -1,7 +1,8 @@ [ {"paramName": "m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true}, {"paramName": "hb", "paramLongName":"hostedByMapPath", "paramDescription": "the hosted By Map Path", "paramRequired": true}, + {"paramName": "oo", "paramLongName":"openaireOrganizationPath", "paramDescription": "the openaire Organization Path", "paramRequired": true}, {"paramName": "ap", "paramLongName":"affiliationPath", "paramDescription": "the Affliation Path", "paramRequired": true}, {"paramName": "pa", "paramLongName":"paperAffiliationPath", "paramDescription": "the paperAffiliation Path", "paramRequired": true}, - {"paramName": "w", "paramLongName":"workingPath", "paramDescription": "the Working Path", "paramRequired": true} + {"paramName": "w", "paramLongName":"workingPath", "paramDescription": "the Working Path", "paramRequired": true} ] diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index f5596b60e..eb82c3a7d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -27,6 +27,12 @@ hostedByMapPath the hostedByMap Path
+ + openaireOrganizationPath + the OpenAire Organizations Path + + + outputPath the Path of the sequence file action set @@ -214,6 +220,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --hostedByMapPath${hostedByMapPath} + --openaireOrganizationPath${openaireOrganizationPath} --affiliationPath${inputPathMAG}/dataset/Affiliations --paperAffiliationPath${inputPathMAG}/dataset/PaperAuthorAffiliations --workingPath${workingPath} From 66702b1973269c77b9780839d80375ad3e9843d9 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 28 Sep 2021 08:58:59 +0200 Subject: [PATCH 255/287] Added node to update datacite --- .../SparkDownloadUpdateDatacite.scala | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala new file mode 100644 index 000000000..0b459fcf6 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala @@ -0,0 +1,53 @@ +package eu.dnetlib.dhp.actionmanager.datacite + + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.LocalFileSystem +import org.apache.hadoop.hdfs.DistributedFileSystem +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} +import org.apache.spark.sql.functions.max +import org.slf4j.{Logger, LoggerFactory} + +import java.text.SimpleDateFormat +import java.util.{Date, Locale} +import scala.io.Source + +object SparkDownloadUpdateDatacite { + val log: Logger = LoggerFactory.getLogger(getClass) + + def main(args: Array[String]): Unit = { + + val conf = new SparkConf + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json")).mkString) + parser.parseArgument(args) + val master = parser.get("master") + val sourcePath = parser.get("sourcePath") + val workingPath = parser.get("workingPath") + + val hdfsuri = parser.get("namenode") + log.info(s"namenode is $hdfsuri") + + + val spark: SparkSession = SparkSession.builder().config(conf) + .appName(getClass.getSimpleName) + .master(master) + .getOrCreate() + + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resEncoder: Encoder[Result] = Encoders.kryo[Result] + + import spark.implicits._ + + + val maxDate:String = spark.read.load(workingPath).as[Oaf].filter(s => s.isInstanceOf[Result]).map(r => r.asInstanceOf[Result].getDateofcollection).select(max("value")).first().getString(0) + val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) + val string_to_date =ISO8601FORMAT.parse(maxDate) + val ts = string_to_date.getTime + + + } + +} From 60a6a9a58381a41d16fc30bf635d2bb4eb8868fc Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 30 Sep 2021 09:27:26 +0200 Subject: [PATCH 256/287] [graph2hive] added field 'measures' to the result view --- .../graph/hive/oozie_app/lib/scripts/postprocessing.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql index ea483a4a7..46e0eb5e1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql @@ -1,10 +1,10 @@ DROP VIEW IF EXISTS ${hiveDbName}.result; CREATE VIEW IF NOT EXISTS ${hiveDbName}.result as - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.publication p + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.publication p union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.dataset d + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.dataset d union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.software s + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.software s union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.otherresearchproduct o; + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.otherresearchproduct o; From c4ccd7b32c3ee272188bad034b6f0264c074bd88 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 1 Oct 2021 12:59:47 +0200 Subject: [PATCH 257/287] - --- .../dhp/oa/dedup/SparkWhitelistSimRels.java | 196 +-- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 1266 +++++++++-------- .../eu/dnetlib/dhp/PropagationConstant.java | 11 +- .../SparkOrcidToResultFromSemRelJob.java | 7 +- ...kResultToCommunityFromOrganizationJob.java | 4 +- ...parkResultToCommunityThroughSemRelJob.java | 4 +- .../dhp/oa/graph/dump/ResultMapper.java | 84 +- .../dhp/oa/graph/dump/SaveCommunityMap.java | 10 +- .../community/SparkPrepareResultProject.java | 10 +- .../dump/complete/DumpGraphEntities.java | 25 +- .../dump/complete/QueryInformationSystem.java | 6 +- .../dump/complete/SparkDumpRelationJob.java | 2 +- .../funderresults/SparkDumpFunderResults.java | 53 +- .../SparkResultLinkedToProject.java | 1 - .../dhp/oa/graph/dump/DumpJobTest.java | 380 +++-- .../dump/PrepareResultProjectJobTest.java | 152 +- .../oa/graph/dump/UpdateProjectInfoTest.java | 83 +- .../graph/dump/complete/DumpRelationTest.java | 80 +- .../complete/QueryInformationSystemTest.java | 8 +- .../dump/funderresult/SplitPerFunderTest.java | 4 +- .../provision/IndexRecordTransformerTest.java | 3 +- pom.xml | 2 +- 22 files changed, 1273 insertions(+), 1118 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java index fa7d33570..48ba8a6f6 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; @@ -35,117 +36,124 @@ import scala.Tuple3; public class SparkWhitelistSimRels extends AbstractSparkAction { - private static final Logger log = LoggerFactory.getLogger(SparkCreateSimRels.class); + private static final Logger log = LoggerFactory.getLogger(SparkCreateSimRels.class); - private static final String WHITELIST_SEPARATOR = "####"; + private static final String WHITELIST_SEPARATOR = "####"; - public SparkWhitelistSimRels(ArgumentApplicationParser parser, SparkSession spark) { - super(parser, spark); - } + public SparkWhitelistSimRels(ArgumentApplicationParser parser, SparkSession spark) { + super(parser, spark); + } - public static void main(String[] args) throws Exception { - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); - parser.parseArgument(args); + public static void main(String[] args) throws Exception { + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + parser.parseArgument(args); - SparkConf conf = new SparkConf(); - new SparkWhitelistSimRels(parser, getSparkSession(conf)) - .run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl"))); - } + SparkConf conf = new SparkConf(); + new SparkWhitelistSimRels(parser, getSparkSession(conf)) + .run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl"))); + } - @Override - public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException, SAXException { + @Override + public void run(ISLookUpService isLookUpService) + throws DocumentException, IOException, ISLookUpException, SAXException { - // read oozie parameters - final String graphBasePath = parser.get("graphBasePath"); - final String isLookUpUrl = parser.get("isLookUpUrl"); - final String actionSetId = parser.get("actionSetId"); - final String workingPath = parser.get("workingPath"); - final int numPartitions = Optional - .ofNullable(parser.get("numPartitions")) - .map(Integer::valueOf) - .orElse(NUM_PARTITIONS); - final String whiteListPath = parser.get("whiteListPath"); + // read oozie parameters + final String graphBasePath = parser.get("graphBasePath"); + final String isLookUpUrl = parser.get("isLookUpUrl"); + final String actionSetId = parser.get("actionSetId"); + final String workingPath = parser.get("workingPath"); + final int numPartitions = Optional + .ofNullable(parser.get("numPartitions")) + .map(Integer::valueOf) + .orElse(NUM_PARTITIONS); + final String whiteListPath = parser.get("whiteListPath"); - log.info("numPartitions: '{}'", numPartitions); - log.info("graphBasePath: '{}'", graphBasePath); - log.info("isLookUpUrl: '{}'", isLookUpUrl); - log.info("actionSetId: '{}'", actionSetId); - log.info("workingPath: '{}'", workingPath); - log.info("whiteListPath: '{}'", whiteListPath); + log.info("numPartitions: '{}'", numPartitions); + log.info("graphBasePath: '{}'", graphBasePath); + log.info("isLookUpUrl: '{}'", isLookUpUrl); + log.info("actionSetId: '{}'", actionSetId); + log.info("workingPath: '{}'", workingPath); + log.info("whiteListPath: '{}'", whiteListPath); - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - //file format: source####target - Dataset> whiteListRels = spark.createDataset(sc - .textFile(whiteListPath) - //check if the line is in the correct format: id1####id2 - .filter(s -> s.contains(WHITELIST_SEPARATOR) && s.split(WHITELIST_SEPARATOR).length == 2) - .map(s -> new Tuple2<>(s.split(WHITELIST_SEPARATOR)[0], s.split(WHITELIST_SEPARATOR)[1])) - .rdd(), - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + // file format: source####target + Dataset> whiteListRels = spark + .createDataset( + sc + .textFile(whiteListPath) + // check if the line is in the correct format: id1####id2 + .filter(s -> s.contains(WHITELIST_SEPARATOR) && s.split(WHITELIST_SEPARATOR).length == 2) + .map(s -> new Tuple2<>(s.split(WHITELIST_SEPARATOR)[0], s.split(WHITELIST_SEPARATOR)[1])) + .rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - // for each dedup configuration - for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { + // for each dedup configuration + for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { - final String entity = dedupConf.getWf().getEntityType(); - final String subEntity = dedupConf.getWf().getSubEntityValue(); - log.info("Adding whitelist simrels for: '{}'", subEntity); + final String entity = dedupConf.getWf().getEntityType(); + final String subEntity = dedupConf.getWf().getSubEntityValue(); + log.info("Adding whitelist simrels for: '{}'", subEntity); - final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity); + final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity); - Dataset> entities = spark.createDataset(sc - .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) - .repartition(numPartitions) - .mapToPair( - (PairFunction) s -> { - MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s); - return new Tuple2<>(d.getIdentifier(), "present"); - }) - .rdd(), - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + Dataset> entities = spark + .createDataset( + sc + .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) + .repartition(numPartitions) + .mapToPair( + (PairFunction) s -> { + MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s); + return new Tuple2<>(d.getIdentifier(), "present"); + }) + .rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - Dataset> whiteListRels1 = whiteListRels - .joinWith(entities, whiteListRels.col("_1").equalTo(entities.col("_1")), "inner") - .map((MapFunction, Tuple2>, Tuple2>) Tuple2::_1, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + Dataset> whiteListRels1 = whiteListRels + .joinWith(entities, whiteListRels.col("_1").equalTo(entities.col("_1")), "inner") + .map( + (MapFunction, Tuple2>, Tuple2>) Tuple2::_1, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - Dataset> whiteListRels2 = whiteListRels1 - .joinWith(entities, whiteListRels1.col("_2").equalTo(entities.col("_1")), "inner") - .map((MapFunction, Tuple2>, Tuple2>) Tuple2::_1, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + Dataset> whiteListRels2 = whiteListRels1 + .joinWith(entities, whiteListRels1.col("_2").equalTo(entities.col("_1")), "inner") + .map( + (MapFunction, Tuple2>, Tuple2>) Tuple2::_1, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - Dataset whiteListSimRels = whiteListRels2 - .map((MapFunction, Relation>) - r -> createSimRel(r._1(), r._2(), entity), - Encoders.bean(Relation.class) - ); + Dataset whiteListSimRels = whiteListRels2 + .map( + (MapFunction, Relation>) r -> createSimRel(r._1(), r._2(), entity), + Encoders.bean(Relation.class)); - saveParquet(whiteListSimRels, outputPath, SaveMode.Append); - } - } + saveParquet(whiteListSimRels, outputPath, SaveMode.Append); + } + } - private Relation createSimRel(String source, String target, String entity) { - final Relation r = new Relation(); - r.setSource(source); - r.setTarget(target); - r.setSubRelType("dedupSimilarity"); - r.setRelClass("isSimilarTo"); - r.setDataInfo(new DataInfo()); + private Relation createSimRel(String source, String target, String entity) { + final Relation r = new Relation(); + r.setSource(source); + r.setTarget(target); + r.setSubRelType("dedupSimilarity"); + r.setRelClass("isSimilarTo"); + r.setDataInfo(new DataInfo()); - switch (entity) { - case "result": - r.setRelType("resultResult"); - break; - case "organization": - r.setRelType("organizationOrganization"); - break; - default: - throw new IllegalArgumentException("unmanaged entity type: " + entity); - } - return r; - } + switch (entity) { + case "result": + r.setRelType("resultResult"); + break; + case "organization": + r.setRelType("organizationOrganization"); + break; + default: + throw new IllegalArgumentException("unmanaged entity type: " + entity); + } + return r; + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index fa03f93a6..549988767 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -48,634 +48,640 @@ import scala.Tuple2; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class SparkDedupTest implements Serializable { - @Mock(serializable = true) - ISLookUpService isLookUpService; - - private static SparkSession spark; - private static JavaSparkContext jsc; - - private static String testGraphBasePath; - private static String testOutputBasePath; - private static String testDedupGraphBasePath; - private static final String testActionSetId = "test-orchestrator"; - private static String whitelistPath; - private static List whiteList; - - private static String WHITELIST_SEPARATOR = "####"; - - @BeforeAll - public static void cleanUp() throws IOException, URISyntaxException { - - testGraphBasePath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) - .toFile() - .getAbsolutePath(); - testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); - - testDedupGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); - - whitelistPath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/whitelist.simrels.txt").toURI()) - .toFile() - .getAbsolutePath(); - whiteList = IOUtils.readLines(new FileReader(whitelistPath)); - - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); - - final SparkConf conf = new SparkConf(); - conf.set("spark.sql.shuffle.partitions", "200"); - spark = SparkSession - .builder() - .appName(SparkDedupTest.class.getSimpleName()) - .master("local[*]") - .config(conf) - .getOrCreate(); - - jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - } - - @BeforeEach - public void setUp() throws IOException, ISLookUpException { - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("organization"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("software"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("dataset"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("otherresearchproduct"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); - } - - @Test - @Order(1) - void createSimRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json"))); - - parser - .parseArgument( - new String[]{ - "-i", testGraphBasePath, - "-asi", testActionSetId, - "-la", "lookupurl", - "-w", testOutputBasePath, - "-np", "50" - }); - - new SparkCreateSimRels(parser, spark).run(isLookUpService); - - long orgs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) - .count(); - - long pubs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) - .count(); - - long sw_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")) - .count(); - - long ds_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) - .count(); - - long orp_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) - .count(); - - assertEquals(3082, orgs_simrel); - assertEquals(7036, pubs_simrel); - assertEquals(336, sw_simrel); - assertEquals(442, ds_simrel); - assertEquals(6750, orp_simrel); - } - - @Test - @Order(2) - void whitelistSimRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkWhitelistSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); - - parser - .parseArgument( - new String[]{ - "-i", testGraphBasePath, - "-asi", testActionSetId, - "-la", "lookupurl", - "-w", testOutputBasePath, - "-np", "50", - "-wl", whitelistPath - }); - - new SparkWhitelistSimRels(parser, spark).run(isLookUpService); - - long orgs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) - .count(); - - long pubs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) - .count(); - - long ds_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) - .count(); - - long orp_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) - .count(); - - //entities simrels supposed to be equal to the number of previous step (no rels in whitelist) - assertEquals(3082, orgs_simrel); - assertEquals(7036, pubs_simrel); - assertEquals(442, ds_simrel); - assertEquals(6750, orp_simrel); - - //entities simrels to be different from the number of previous step (new simrels in the whitelist) - Dataset sw_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")); - - //check if the first relation in the whitelist exists - assertTrue(sw_simrel - .as(Encoders.bean(Relation.class)) - .toJavaRDD() - .filter(rel -> - rel.getSource().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[0]) && rel.getTarget().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[1])).count() > 0); - //check if the second relation in the whitelist exists - assertTrue(sw_simrel - .as(Encoders.bean(Relation.class)) - .toJavaRDD() - .filter(rel -> - rel.getSource().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[0]) && rel.getTarget().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[1])).count() > 0); - - assertEquals(338, sw_simrel.count()); - - } - - @Test - @Order(3) - void cutMergeRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateMergeRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); - - parser - .parseArgument( - new String[]{ - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath, - "-cc", - "3" - }); - - new SparkCreateMergeRels(parser, spark).run(isLookUpService); - - long orgs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long pubs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - long sw_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long ds_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long orp_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - assertEquals(0, orgs_mergerel); - assertEquals(0, pubs_mergerel); - assertEquals(0, sw_mergerel); - assertEquals(0, ds_mergerel); - assertEquals(0, orp_mergerel); - - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel")); - FileUtils - .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); - } - - @Test - @Order(4) - void createMergeRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateMergeRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); - - parser - .parseArgument( - new String[]{ - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath - }); - - new SparkCreateMergeRels(parser, spark).run(isLookUpService); - - long orgs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .count(); - long pubs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .count(); - long sw_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .count(); - long ds_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .count(); - - long orp_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .count(); - - assertEquals(1272, orgs_mergerel); - assertEquals(1438, pubs_mergerel); - assertEquals(286, sw_mergerel); - assertEquals(472, ds_mergerel); - assertEquals(718, orp_mergerel); - - } - - @Test - @Order(5) - void createDedupRecordTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateDedupRecord.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json"))); - parser - .parseArgument( - new String[]{ - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath - }); - - new SparkCreateDedupRecord(parser, spark).run(isLookUpService); - - long orgs_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_deduprecord") - .count(); - long pubs_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") - .count(); - long sw_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord") - .count(); - long ds_deduprecord = jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_deduprecord").count(); - long orp_deduprecord = jsc - .textFile( - testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord") - .count(); - - assertEquals(85, orgs_deduprecord); - assertEquals(65, pubs_deduprecord); - assertEquals(49, sw_deduprecord); - assertEquals(97, ds_deduprecord); - assertEquals(89, orp_deduprecord); - } - - @Test - @Order(6) - void updateEntityTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkUpdateEntity.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json"))); - parser - .parseArgument( - new String[]{ - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath - }); - - new SparkUpdateEntity(parser, spark).run(isLookUpService); - - long organizations = jsc.textFile(testDedupGraphBasePath + "/organization").count(); - long publications = jsc.textFile(testDedupGraphBasePath + "/publication").count(); - long projects = jsc.textFile(testDedupGraphBasePath + "/project").count(); - long datasource = jsc.textFile(testDedupGraphBasePath + "/datasource").count(); - long softwares = jsc.textFile(testDedupGraphBasePath + "/software").count(); - long dataset = jsc.textFile(testDedupGraphBasePath + "/dataset").count(); - long otherresearchproduct = jsc.textFile(testDedupGraphBasePath + "/otherresearchproduct").count(); - - long mergedOrgs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedPubs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedSw = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedDs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedOrp = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - assertEquals(896, publications); - assertEquals(838, organizations); - assertEquals(100, projects); - assertEquals(100, datasource); - assertEquals(198, softwares); - assertEquals(389, dataset); - assertEquals(517, otherresearchproduct); - - long deletedOrgs = jsc - .textFile(testDedupGraphBasePath + "/organization") - .filter(this::isDeletedByInference) - .count(); - - long deletedPubs = jsc - .textFile(testDedupGraphBasePath + "/publication") - .filter(this::isDeletedByInference) - .count(); - - long deletedSw = jsc - .textFile(testDedupGraphBasePath + "/software") - .filter(this::isDeletedByInference) - .count(); - - long deletedDs = jsc - .textFile(testDedupGraphBasePath + "/dataset") - .filter(this::isDeletedByInference) - .count(); - - long deletedOrp = jsc - .textFile(testDedupGraphBasePath + "/otherresearchproduct") - .filter(this::isDeletedByInference) - .count(); - - assertEquals(mergedOrgs, deletedOrgs); - assertEquals(mergedPubs, deletedPubs); - assertEquals(mergedSw, deletedSw); - assertEquals(mergedDs, deletedDs); - assertEquals(mergedOrp, deletedOrp); - } - - @Test - @Order(7) - void propagateRelationTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkPropagateRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); - parser - .parseArgument( - new String[]{ - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath - }); - - new SparkPropagateRelation(parser, spark).run(isLookUpService); - - long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - - assertEquals(4860, relations); - - // check deletedbyinference - final Dataset mergeRels = spark - .read() - .load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*")) - .as(Encoders.bean(Relation.class)); - final JavaPairRDD mergedIds = mergeRels - .where("relClass == 'merges'") - .select(mergeRels.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2(r.getString(0), "d")); - - JavaRDD toCheck = jsc - .textFile(testDedupGraphBasePath + "/relation") - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()) - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()); - - long deletedbyinference = toCheck.filter(this::isDeletedByInference).count(); - long updated = toCheck.count(); - - assertEquals(updated, deletedbyinference); - } - - @Test - @Order(8) - void testRelations() throws Exception { - testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); - testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); - } - - private void testUniqueness(String path, int expected_total, int expected_unique) { - Dataset rel = spark - .read() - .textFile(getClass().getResource(path).getPath()) - .map( - (MapFunction) s -> new ObjectMapper().readValue(s, Relation.class), - Encoders.bean(Relation.class)); - - assertEquals(expected_total, rel.count()); - assertEquals(expected_unique, rel.distinct().count()); - } - - @AfterAll - public static void finalCleanUp() throws IOException { - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); - } - - public boolean isDeletedByInference(String s) { - return s.contains("\"deletedbyinference\":true"); - } + @Mock(serializable = true) + ISLookUpService isLookUpService; + + private static SparkSession spark; + private static JavaSparkContext jsc; + + private static String testGraphBasePath; + private static String testOutputBasePath; + private static String testDedupGraphBasePath; + private static final String testActionSetId = "test-orchestrator"; + private static String whitelistPath; + private static List whiteList; + + private static String WHITELIST_SEPARATOR = "####"; + + @BeforeAll + public static void cleanUp() throws IOException, URISyntaxException { + + testGraphBasePath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) + .toFile() + .getAbsolutePath(); + testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + testDedupGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + whitelistPath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/whitelist.simrels.txt").toURI()) + .toFile() + .getAbsolutePath(); + whiteList = IOUtils.readLines(new FileReader(whitelistPath)); + + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + + final SparkConf conf = new SparkConf(); + conf.set("spark.sql.shuffle.partitions", "200"); + spark = SparkSession + .builder() + .appName(SparkDedupTest.class.getSimpleName()) + .master("local[*]") + .config(conf) + .getOrCreate(); + + jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + } + + @BeforeEach + public void setUp() throws IOException, ISLookUpException { + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("organization"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("software"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("dataset"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("otherresearchproduct"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); + } + + @Test + @Order(1) + void createSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-la", "lookupurl", + "-w", testOutputBasePath, + "-np", "50" + }); + + new SparkCreateSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + long pubs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .count(); + + long sw_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")) + .count(); + + long ds_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) + .count(); + + long orp_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) + .count(); + + assertEquals(3082, orgs_simrel); + assertEquals(7036, pubs_simrel); + assertEquals(336, sw_simrel); + assertEquals(442, ds_simrel); + assertEquals(6750, orp_simrel); + } + + @Test + @Order(2) + void whitelistSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkWhitelistSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-la", "lookupurl", + "-w", testOutputBasePath, + "-np", "50", + "-wl", whitelistPath + }); + + new SparkWhitelistSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + long pubs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .count(); + + long ds_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) + .count(); + + long orp_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) + .count(); + + // entities simrels supposed to be equal to the number of previous step (no rels in whitelist) + assertEquals(3082, orgs_simrel); + assertEquals(7036, pubs_simrel); + assertEquals(442, ds_simrel); + assertEquals(6750, orp_simrel); + + // entities simrels to be different from the number of previous step (new simrels in the whitelist) + Dataset sw_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")); + + // check if the first relation in the whitelist exists + assertTrue( + sw_simrel + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .filter( + rel -> rel.getSource().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[0]) + && rel.getTarget().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[1])) + .count() > 0); + // check if the second relation in the whitelist exists + assertTrue( + sw_simrel + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .filter( + rel -> rel.getSource().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[0]) + && rel.getTarget().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[1])) + .count() > 0); + + assertEquals(338, sw_simrel.count()); + + } + + @Test + @Order(3) + void cutMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateMergeRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath, + "-cc", + "3" + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long pubs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + assertEquals(0, orgs_mergerel); + assertEquals(0, pubs_mergerel); + assertEquals(0, sw_mergerel); + assertEquals(0, ds_mergerel); + assertEquals(0, orp_mergerel); + + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel")); + FileUtils + .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); + } + + @Test + @Order(4) + void createMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateMergeRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .count(); + long pubs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .count(); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .count(); + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .count(); + + assertEquals(1272, orgs_mergerel); + assertEquals(1438, pubs_mergerel); + assertEquals(286, sw_mergerel); + assertEquals(472, ds_mergerel); + assertEquals(718, orp_mergerel); + + } + + @Test + @Order(5) + void createDedupRecordTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateDedupRecord.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath + }); + + new SparkCreateDedupRecord(parser, spark).run(isLookUpService); + + long orgs_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_deduprecord") + .count(); + long pubs_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") + .count(); + long sw_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord") + .count(); + long ds_deduprecord = jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_deduprecord").count(); + long orp_deduprecord = jsc + .textFile( + testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord") + .count(); + + assertEquals(85, orgs_deduprecord); + assertEquals(65, pubs_deduprecord); + assertEquals(49, sw_deduprecord); + assertEquals(97, ds_deduprecord); + assertEquals(89, orp_deduprecord); + } + + @Test + @Order(6) + void updateEntityTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkUpdateEntity.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + }); + + new SparkUpdateEntity(parser, spark).run(isLookUpService); + + long organizations = jsc.textFile(testDedupGraphBasePath + "/organization").count(); + long publications = jsc.textFile(testDedupGraphBasePath + "/publication").count(); + long projects = jsc.textFile(testDedupGraphBasePath + "/project").count(); + long datasource = jsc.textFile(testDedupGraphBasePath + "/datasource").count(); + long softwares = jsc.textFile(testDedupGraphBasePath + "/software").count(); + long dataset = jsc.textFile(testDedupGraphBasePath + "/dataset").count(); + long otherresearchproduct = jsc.textFile(testDedupGraphBasePath + "/otherresearchproduct").count(); + + long mergedOrgs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedPubs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedSw = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedDs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedOrp = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + assertEquals(896, publications); + assertEquals(838, organizations); + assertEquals(100, projects); + assertEquals(100, datasource); + assertEquals(198, softwares); + assertEquals(389, dataset); + assertEquals(517, otherresearchproduct); + + long deletedOrgs = jsc + .textFile(testDedupGraphBasePath + "/organization") + .filter(this::isDeletedByInference) + .count(); + + long deletedPubs = jsc + .textFile(testDedupGraphBasePath + "/publication") + .filter(this::isDeletedByInference) + .count(); + + long deletedSw = jsc + .textFile(testDedupGraphBasePath + "/software") + .filter(this::isDeletedByInference) + .count(); + + long deletedDs = jsc + .textFile(testDedupGraphBasePath + "/dataset") + .filter(this::isDeletedByInference) + .count(); + + long deletedOrp = jsc + .textFile(testDedupGraphBasePath + "/otherresearchproduct") + .filter(this::isDeletedByInference) + .count(); + + assertEquals(mergedOrgs, deletedOrgs); + assertEquals(mergedPubs, deletedPubs); + assertEquals(mergedSw, deletedSw); + assertEquals(mergedDs, deletedDs); + assertEquals(mergedOrp, deletedOrp); + } + + @Test + @Order(7) + void propagateRelationTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkPropagateRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + }); + + new SparkPropagateRelation(parser, spark).run(isLookUpService); + + long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); + + assertEquals(4860, relations); + + // check deletedbyinference + final Dataset mergeRels = spark + .read() + .load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*")) + .as(Encoders.bean(Relation.class)); + final JavaPairRDD mergedIds = mergeRels + .where("relClass == 'merges'") + .select(mergeRels.col("target")) + .distinct() + .toJavaRDD() + .mapToPair( + (PairFunction) r -> new Tuple2(r.getString(0), "d")); + + JavaRDD toCheck = jsc + .textFile(testDedupGraphBasePath + "/relation") + .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json)) + .join(mergedIds) + .map(t -> t._2()._1()) + .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json)) + .join(mergedIds) + .map(t -> t._2()._1()); + + long deletedbyinference = toCheck.filter(this::isDeletedByInference).count(); + long updated = toCheck.count(); + + assertEquals(updated, deletedbyinference); + } + + @Test + @Order(8) + void testRelations() throws Exception { + testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); + testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); + } + + private void testUniqueness(String path, int expected_total, int expected_unique) { + Dataset rel = spark + .read() + .textFile(getClass().getResource(path).getPath()) + .map( + (MapFunction) s -> new ObjectMapper().readValue(s, Relation.class), + Encoders.bean(Relation.class)); + + assertEquals(expected_total, rel.count()); + assertEquals(expected_unique, rel.distinct().count()); + } + + @AfterAll + public static void finalCleanUp() throws IOException { + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + } + + public boolean isDeletedByInference(String s) { + return s.contains("\"deletedbyinference\":true"); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 0d7c74475..23e97a97a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -69,7 +69,7 @@ public class PropagationConstant { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_COUNTRY_INSTREPO_CLASS_ID, PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + ModelConstants.DNET_PROVENANCE_ACTIONS)); return nc; } @@ -84,7 +84,8 @@ public class PropagationConstant { return di; } - public static Qualifier getQualifier(String inference_class_id, String inference_class_name, String qualifierSchema) { + public static Qualifier getQualifier(String inference_class_id, String inference_class_name, + String qualifierSchema) { Qualifier pa = new Qualifier(); pa.setClassid(inference_class_id); pa.setClassname(inference_class_name); @@ -108,7 +109,11 @@ public class PropagationConstant { r.setRelClass(rel_class); r.setRelType(rel_type); r.setSubRelType(subrel_type); - r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name, ModelConstants.DNET_PROVENANCE_ACTIONS)); + r + .setDataInfo( + getDataInfo( + inference_provenance, inference_class_id, inference_class_name, + ModelConstants.DNET_PROVENANCE_ACTIONS)); return r; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 68949b900..a38b4da2e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -173,14 +173,17 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); + p + .setQualifier( + getQualifier( + ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); p .setDataInfo( getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + ModelConstants.DNET_PROVENANCE_ACTIONS)); Optional> authorPid = Optional.ofNullable(author.getPid()); if (authorPid.isPresent()) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 1289ff644..50df08f8c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -10,7 +10,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -22,6 +21,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; @@ -130,7 +130,7 @@ public class SparkResultToCommunityFromOrganizationJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS))); + ModelConstants.DNET_PROVENANCE_ACTIONS))); propagatedContexts.add(newContext); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 7f76ead94..f31a26230 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -20,6 +19,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import scala.Tuple2; @@ -126,7 +126,7 @@ public class SparkResultToCommunityThroughSemRelJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS))); + ModelConstants.DNET_PROVENANCE_ACTIONS))); return newContext; } return null; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index f4bf8d52a..988a8c7a4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -6,6 +6,8 @@ import java.util.*; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.*; @@ -23,8 +25,6 @@ import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Context; import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult; import eu.dnetlib.dhp.schema.oaf.*; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Encoders; public class ResultMapper implements Serializable { @@ -278,16 +278,17 @@ public class ResultMapper implements Serializable { } - Optional .ofNullable(input.getPid()) .ifPresent( - value -> out.setPid(value - .stream() - .map( - p -> - ControlledField - .newInstance(p.getQualifier().getClassid(), p.getValue())).collect(Collectors.toList()))); + value -> out + .setPid( + value + .stream() + .map( + p -> ControlledField + .newInstance(p.getQualifier().getClassid(), p.getValue())) + .collect(Collectors.toList()))); oStr = Optional.ofNullable(input.getDateofacceptance()); if (oStr.isPresent()) { @@ -298,11 +299,10 @@ public class ResultMapper implements Serializable { out.setPublisher(oStr.get().getValue()); } - Optional .ofNullable(input.getSource()) - .ifPresent(value -> out.setSource(value.stream().map(s -> s.getValue()).collect(Collectors.toList()) )); - // value.stream().forEach(s -> sourceList.add(s.getValue()))); + .ifPresent(value -> out.setSource(value.stream().map(s -> s.getValue()).collect(Collectors.toList()))); + // value.stream().forEach(s -> sourceList.add(s.getValue()))); // out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList())); List subjectList = new ArrayList<>(); Optional @@ -577,48 +577,60 @@ public class ResultMapper implements Serializable { Optional di = Optional.ofNullable(pid.getDataInfo()); if (di.isPresent()) { return Pid - .newInstance( - ControlledField - .newInstance( - pid.getQualifier().getClassid(), - pid.getValue()), - Provenance - .newInstance( - di.get().getProvenanceaction().getClassname(), - di.get().getTrust())); + .newInstance( + ControlledField + .newInstance( + pid.getQualifier().getClassid(), + pid.getValue()), + Provenance + .newInstance( + di.get().getProvenanceaction().getClassname(), + di.get().getTrust())); } else { return Pid - .newInstance( - ControlledField - .newInstance( - pid.getQualifier().getClassid(), - pid.getValue()) + .newInstance( + ControlledField + .newInstance( + pid.getQualifier().getClassid(), + pid.getValue()) - ); + ); } } private static Pid getOrcid(List p) { List pid_list = p.stream().map(pid -> { if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) || - (pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))){ + (pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))) { return pid; } - return null; + return null; }).filter(pid -> pid != null).collect(Collectors.toList()); - if(pid_list.size() == 1){ + if (pid_list.size() == 1) { return getAuthorPid(pid_list.get(0)); } - List orcid = pid_list.stream().filter(ap -> ap.getQualifier().getClassid() - .equals(ModelConstants.ORCID)).collect(Collectors.toList()); - if(orcid.size() == 1){ + List orcid = pid_list + .stream() + .filter( + ap -> ap + .getQualifier() + .getClassid() + .equals(ModelConstants.ORCID)) + .collect(Collectors.toList()); + if (orcid.size() == 1) { return getAuthorPid(orcid.get(0)); } - orcid = pid_list.stream().filter(ap -> ap.getQualifier().getClassid() - .equals(ModelConstants.ORCID_PENDING)).collect(Collectors.toList()); - if(orcid.size() == 1){ + orcid = pid_list + .stream() + .filter( + ap -> ap + .getQualifier() + .getClassid() + .equals(ModelConstants.ORCID_PENDING)) + .collect(Collectors.toList()); + if (orcid.size() == 1) { return getAuthorPid(orcid.get(0)); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index 6cdd741ea..971324744 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -86,10 +86,10 @@ public class SaveCommunityMap implements Serializable { private void saveCommunityMap(boolean singleCommunity, String community_id) throws ISLookUpException, IOException, DocumentException, SAXException { - writer - .write( - Utils.OBJECT_MAPPER - .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, community_id))); - } + writer + .write( + Utils.OBJECT_MAPPER + .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, community_id))); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java index edd6d9b23..3508e4126 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java @@ -8,8 +8,6 @@ import java.io.StringReader; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.dump.oaf.community.Validated; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -28,9 +26,11 @@ import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.Provenance; import eu.dnetlib.dhp.schema.dump.oaf.community.Funder; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; +import eu.dnetlib.dhp.schema.dump.oaf.community.Validated; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -80,7 +80,9 @@ public class SparkPrepareResultProject implements Serializable { private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath) { Dataset relation = Utils .readPath(spark, inputPath + "/relation", Relation.class) - .filter("dataInfo.deletedbyinference = false and lower(relClass) = '" + ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'"); + .filter( + "dataInfo.deletedbyinference = false and lower(relClass) = '" + + ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'"); Dataset projects = Utils .readPath(spark, inputPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class); @@ -159,7 +161,7 @@ public class SparkPrepareResultProject implements Serializable { provenance.setTrust(di.get().getTrust()); p.setProvenance(provenance); } - if (relation.getValidated()){ + if (relation.getValidated()) { p.setValidated(Validated.newInstance(relation.getValidated(), relation.getValidationDate())); } return p; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java index 7441baaef..7f64db41c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java @@ -9,7 +9,6 @@ import java.io.StringReader; import java.util.*; import java.util.stream.Collectors; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.ForeachFunction; @@ -23,6 +22,8 @@ import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.common.ModelSupport; @@ -453,18 +454,20 @@ public class DumpGraphEntities implements Serializable { private static void organizationMap(SparkSession spark, String inputPath, String outputPath, Class inputClazz) { - Utils.readPath(spark, inputPath, inputClazz) - .map( - (MapFunction) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o), - Encoders.bean(Organization.class)) - .filter((FilterFunction) o -> o!= null) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); + Utils + .readPath(spark, inputPath, inputClazz) + .map( + (MapFunction) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o), + Encoders.bean(Organization.class)) + .filter((FilterFunction) o -> o != null) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); } - private static eu.dnetlib.dhp.schema.dump.oaf.graph.Organization mapOrganization(eu.dnetlib.dhp.schema.oaf.Organization org) { + private static eu.dnetlib.dhp.schema.dump.oaf.graph.Organization mapOrganization( + eu.dnetlib.dhp.schema.oaf.Organization org) { if (org.getDataInfo().getDeletedbyinference()) return null; Organization organization = new Organization(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java index be1f21171..88c92da53 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java @@ -5,8 +5,6 @@ import java.io.StringReader; import java.util.*; import java.util.function.Consumer; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; @@ -15,6 +13,8 @@ import org.dom4j.io.SAXReader; import org.jetbrains.annotations.NotNull; import org.xml.sax.SAXException; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -140,7 +140,7 @@ public class QueryInformationSystem { } private String makeOpenaireId(Node el, String prefix) { - if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))){ + if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) { return null; } String funder = null; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java index 7370b43f8..ab7e7e595 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java @@ -107,7 +107,7 @@ public class SparkDumpRelationJob implements Serializable { } } } - if(relation.getValidated()){ + if (relation.getValidated()) { rel_new.setValidated(relation.getValidated()); rel_new.setValidationDate(relation.getValidationDate()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index d520a9c6a..9cddba3c5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -41,9 +41,9 @@ public class SparkDumpFunderResults implements Serializable { parser.parseArgument(args); Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String inputPath = parser.get("sourcePath"); @@ -58,32 +58,31 @@ public class SparkDumpFunderResults implements Serializable { SparkConf conf = new SparkConf(); runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - writeResultProjectList(spark, inputPath, outputPath, graphPath); - }); + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + writeResultProjectList(spark, inputPath, outputPath, graphPath); + }); } private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath, - String graphPath) { + String graphPath) { Dataset project = Utils - .readPath(spark, graphPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class); + .readPath(spark, graphPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class); Dataset result = Utils - .readPath(spark, inputPath + "/publication", CommunityResult.class) - .union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class)) - .union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class)) - .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); - + .readPath(spark, inputPath + "/publication", CommunityResult.class) + .union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class)) + .union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class)) + .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); List funderList = project - .select("id") - .map((MapFunction) value -> value.getString(0).substring(0, 15), Encoders.STRING()) - .distinct() - .collectAsList(); + .select("id") + .map((MapFunction) value -> value.getString(0).substring(0, 15), Encoders.STRING()) + .distinct() + .collectAsList(); funderList.forEach(funder -> { String fundernsp = funder.substring(3); @@ -104,7 +103,7 @@ public class SparkDumpFunderResults implements Serializable { } private static void dumpResults(String nsp, Dataset results, String outputPath, - String funderName) { + String funderName) { results.map((MapFunction) r -> { if (!Optional.ofNullable(r.getProjects()).isPresent()) { @@ -123,15 +122,15 @@ public class SparkDumpFunderResults implements Serializable { } return null; }, Encoders.bean(CommunityResult.class)) - .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "/" + funderName); + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/" + funderName); } private static void writeFunderResult(String funder, Dataset results, String outputPath, - String funderDump) { + String funderDump) { if (funder.startsWith("40|irb")) { dumpResults(funder, results, outputPath, "HRZZ"); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index e17aa285c..156ae6d63 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -80,7 +80,6 @@ public class SparkResultLinkedToProject implements Serializable { private static void writeResultsLinkedToProjects(SparkSession spark, Class inputClazz, String inputPath, String outputPath, String graphPath) { - Dataset results = Utils .readPath(spark, inputPath, inputClazz) .filter("dataInfo.deletedbyinference = false and datainfo.invisible = false"); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 3a4f05fb5..bf6301ec4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -7,17 +7,10 @@ import java.nio.file.Path; import java.util.Arrays; import java.util.List; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.sun.xml.internal.ws.policy.AssertionSet; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.dump.oaf.Instance; -import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; import org.apache.commons.io.FileUtils; -import org.apache.neethi.Assertion; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; @@ -25,10 +18,14 @@ import org.junit.jupiter.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.dump.oaf.Instance; +import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult; import eu.dnetlib.dhp.schema.oaf.Dataset; @@ -145,70 +142,121 @@ public class DumpJobTest { } @Test - public void testPublicationDump(){ + public void testPublicationDump() { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance") + .getPath(); final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); DumpProducts dump = new DumpProducts(); dump - .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); + .run( + // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); + .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); Assertions.assertEquals(1, verificationDataset.count()); GraphResult gr = verificationDataset.first(); - Assertions.assertEquals(2, gr.getMeasures().size()); - Assertions.assertTrue(gr.getMeasures().stream().anyMatch(m -> m.getKey().equals("influence") - && m.getValue().equals("1.62759106106e-08"))); - Assertions.assertTrue(gr.getMeasures().stream().anyMatch(m -> m.getKey().equals("popularity") - && m.getValue().equals("0.22519296"))); + Assertions + .assertTrue( + gr + .getMeasures() + .stream() + .anyMatch( + m -> m.getKey().equals("influence") + && m.getValue().equals("1.62759106106e-08"))); + Assertions + .assertTrue( + gr + .getMeasures() + .stream() + .anyMatch( + m -> m.getKey().equals("popularity") + && m.getValue().equals("0.22519296"))); Assertions.assertEquals(6, gr.getAuthor().size()); - Assertions.assertTrue(gr.getAuthor().stream().anyMatch(a -> a.getFullname().equals("Nikolaidou,Charitini") && - a.getName().equals("Charitini") && a.getSurname().equals("Nikolaidou") - && a.getRank() == 1 && a.getPid() == null)); + Assertions + .assertTrue( + gr + .getAuthor() + .stream() + .anyMatch( + a -> a.getFullname().equals("Nikolaidou,Charitini") && + a.getName().equals("Charitini") && a.getSurname().equals("Nikolaidou") + && a.getRank() == 1 && a.getPid() == null)); - Assertions.assertTrue(gr.getAuthor().stream().anyMatch(a -> a.getFullname().equals("Votsi,Nefta") && - a.getName().equals("Nefta") && a.getSurname().equals("Votsi") - && a.getRank() == 2 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID) - && a.getPid().getId().getValue().equals("0000-0001-6651-1178") && a.getPid().getProvenance() != null)); + Assertions + .assertTrue( + gr + .getAuthor() + .stream() + .anyMatch( + a -> a.getFullname().equals("Votsi,Nefta") && + a.getName().equals("Nefta") && a.getSurname().equals("Votsi") + && a.getRank() == 2 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID) + && a.getPid().getId().getValue().equals("0000-0001-6651-1178") + && a.getPid().getProvenance() != null)); - Assertions.assertTrue(gr.getAuthor().stream().anyMatch(a -> a.getFullname().equals("Sgardelis,Steanos") && - a.getName().equals("Steanos") && a.getSurname().equals("Sgardelis") - && a.getRank() == 3 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID_PENDING) - && a.getPid().getId().getValue().equals("0000-0001-6651-1178") && a.getPid().getProvenance() != null)); + Assertions + .assertTrue( + gr + .getAuthor() + .stream() + .anyMatch( + a -> a.getFullname().equals("Sgardelis,Steanos") && + a.getName().equals("Steanos") && a.getSurname().equals("Sgardelis") + && a.getRank() == 3 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID_PENDING) + && a.getPid().getId().getValue().equals("0000-0001-6651-1178") + && a.getPid().getProvenance() != null)); - Assertions.assertTrue(gr.getAuthor().stream().anyMatch(a -> a.getFullname().equals("Halley,John") && - a.getName().equals("John") && a.getSurname().equals("Halley") - && a.getRank() == 4 && a.getPid() == null)); + Assertions + .assertTrue( + gr + .getAuthor() + .stream() + .anyMatch( + a -> a.getFullname().equals("Halley,John") && + a.getName().equals("John") && a.getSurname().equals("Halley") + && a.getRank() == 4 && a.getPid() == null)); - Assertions.assertTrue(gr.getAuthor().stream().anyMatch(a -> a.getFullname().equals("Pantis,John") && - a.getName().equals("John") && a.getSurname().equals("Pantis") - && a.getRank() == 5 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID) - && a.getPid().getId().getValue().equals("0000-0001-6651-1178") && a.getPid().getProvenance() != null)); + Assertions + .assertTrue( + gr + .getAuthor() + .stream() + .anyMatch( + a -> a.getFullname().equals("Pantis,John") && + a.getName().equals("John") && a.getSurname().equals("Pantis") + && a.getRank() == 5 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID) + && a.getPid().getId().getValue().equals("0000-0001-6651-1178") + && a.getPid().getProvenance() != null)); - Assertions.assertTrue(gr.getAuthor().stream().anyMatch(a -> a.getFullname().equals("Tsiafouli,Maria") && - a.getName().equals("Maria") && a.getSurname().equals("Tsiafouli") - && a.getRank() == 6 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID_PENDING) - && a.getPid().getId().getValue().equals("0000-0001-6651-1178") && a.getPid().getProvenance() != null)); + Assertions + .assertTrue( + gr + .getAuthor() + .stream() + .anyMatch( + a -> a.getFullname().equals("Tsiafouli,Maria") && + a.getName().equals("Maria") && a.getSurname().equals("Tsiafouli") + && a.getRank() == 6 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID_PENDING) + && a.getPid().getId().getValue().equals("0000-0001-6651-1178") + && a.getPid().getProvenance() != null)); Assertions.assertEquals("publication", gr.getType()); @@ -216,27 +264,52 @@ public class DumpJobTest { Assertions.assertEquals("English", gr.getLanguage().getLabel()); Assertions.assertEquals(1, gr.getCountry().size()); - Assertions.assertEquals("IT" , gr.getCountry().get(0).getCode()); - Assertions.assertEquals("Italy" , gr.getCountry().get(0).getLabel()); - Assertions.assertTrue( gr.getCountry().get(0).getProvenance() == null); + Assertions.assertEquals("IT", gr.getCountry().get(0).getCode()); + Assertions.assertEquals("Italy", gr.getCountry().get(0).getLabel()); + Assertions.assertTrue(gr.getCountry().get(0).getProvenance() == null); Assertions.assertEquals(12, gr.getSubjects().size()); - Assertions.assertTrue(gr.getSubjects().stream().anyMatch(s -> s.getSubject().getValue().equals("Ecosystem Services hotspots") - && s.getSubject().getScheme().equals("ACM") && s.getProvenance() != null && - s.getProvenance().getProvenance().equals("sysimport:crosswalk:repository"))); - Assertions.assertTrue(gr.getSubjects().stream().anyMatch(s -> s.getSubject().getValue().equals("Natura 2000") - && s.getSubject().getScheme().equals("") && s.getProvenance() != null && - s.getProvenance().getProvenance().equals("sysimport:crosswalk:repository"))); + Assertions + .assertTrue( + gr + .getSubjects() + .stream() + .anyMatch( + s -> s.getSubject().getValue().equals("Ecosystem Services hotspots") + && s.getSubject().getScheme().equals("ACM") && s.getProvenance() != null && + s.getProvenance().getProvenance().equals("sysimport:crosswalk:repository"))); + Assertions + .assertTrue( + gr + .getSubjects() + .stream() + .anyMatch( + s -> s.getSubject().getValue().equals("Natura 2000") + && s.getSubject().getScheme().equals("") && s.getProvenance() != null && + s.getProvenance().getProvenance().equals("sysimport:crosswalk:repository"))); - Assertions.assertEquals("Ecosystem Service capacity is higher in areas of multiple designation types", + Assertions + .assertEquals( + "Ecosystem Service capacity is higher in areas of multiple designation types", gr.getMaintitle()); Assertions.assertEquals(null, gr.getSubtitle()); Assertions.assertEquals(1, gr.getDescription().size()); - Assertions.assertTrue(gr.getDescription().get(0).startsWith("The implementation of the Ecosystem Service (ES) concept into practice")); - Assertions.assertTrue(gr.getDescription().get(0).endsWith("start complying with new standards and demands for nature conservation and environmental management.")); + Assertions + .assertTrue( + gr + .getDescription() + .get(0) + .startsWith("The implementation of the Ecosystem Service (ES) concept into practice")); + Assertions + .assertTrue( + gr + .getDescription() + .get(0) + .endsWith( + "start complying with new standards and demands for nature conservation and environmental management.")); Assertions.assertEquals("2017-01-01", gr.getPublicationdate()); @@ -255,7 +328,9 @@ public class DumpJobTest { Assertions.assertEquals(0, gr.getCoverage().size()); Assertions.assertEquals(ModelConstants.ACCESS_RIGHT_OPEN, gr.getBestaccessright().getLabel()); - Assertions.assertEquals(Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode()); + Assertions + .assertEquals( + Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode()); Assertions.assertEquals(null, gr.getBestaccessright().getOpenAccessRoute()); Assertions.assertEquals("One Ecosystem", gr.getContainer().getName()); @@ -284,12 +359,16 @@ public class DumpJobTest { Assertions.assertEquals("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2", gr.getId()); Assertions.assertEquals(2, gr.getOriginalId().size()); - Assertions.assertTrue(gr.getOriginalId().contains("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2") - && gr.getOriginalId().contains("10.3897/oneeco.2.e13718")); + Assertions + .assertTrue( + gr.getOriginalId().contains("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2") + && gr.getOriginalId().contains("10.3897/oneeco.2.e13718")); Assertions.assertEquals(1, gr.getPid().size()); - Assertions.assertTrue(gr.getPid().get(0).getScheme().equals("doi") - && gr.getPid().get(0).getValue().equals("10.1016/j.triboint.2014.05.004")); + Assertions + .assertTrue( + gr.getPid().get(0).getScheme().equals("doi") + && gr.getPid().get(0).getValue().equals("10.1016/j.triboint.2014.05.004")); Assertions.assertEquals("2020-03-23T00:20:51.392Z", gr.getDateofcollection()); @@ -298,53 +377,63 @@ public class DumpJobTest { Instance instance = gr.getInstance().get(0); Assertions.assertEquals(0, instance.getPid().size()); Assertions.assertEquals(1, instance.getAlternateIdentifier().size()); - Assertions.assertTrue(instance.getAlternateIdentifier().get(0).getScheme().equals("doi") - && instance.getAlternateIdentifier().get(0).getValue().equals("10.3897/oneeco.2.e13718")); + Assertions + .assertTrue( + instance.getAlternateIdentifier().get(0).getScheme().equals("doi") + && instance.getAlternateIdentifier().get(0).getValue().equals("10.3897/oneeco.2.e13718")); Assertions.assertEquals(null, instance.getLicense()); - Assertions.assertTrue(instance.getAccessright().getCode().equals(Constants.accessRightsCoarMap - .get(ModelConstants.ACCESS_RIGHT_OPEN))); + Assertions + .assertTrue( + instance + .getAccessright() + .getCode() + .equals( + Constants.accessRightsCoarMap + .get(ModelConstants.ACCESS_RIGHT_OPEN))); Assertions.assertTrue(instance.getAccessright().getLabel().equals(ModelConstants.ACCESS_RIGHT_OPEN)); Assertions.assertTrue(instance.getAccessright().getOpenAccessRoute().equals(OpenAccessRoute.green)); Assertions.assertTrue(instance.getType().equals("Article")); Assertions.assertEquals(2, instance.getUrl().size()); - Assertions.assertTrue(instance.getUrl().contains("https://doi.org/10.3897/oneeco.2.e13718") - && instance.getUrl().contains("https://oneecosystem.pensoft.net/article/13718/")); - Assertions.assertEquals("2017-01-01",instance.getPublicationdate()); - Assertions.assertEquals(null,instance.getArticleprocessingcharge()); + Assertions + .assertTrue( + instance.getUrl().contains("https://doi.org/10.3897/oneeco.2.e13718") + && instance.getUrl().contains("https://oneecosystem.pensoft.net/article/13718/")); + Assertions.assertEquals("2017-01-01", instance.getPublicationdate()); + Assertions.assertEquals(null, instance.getArticleprocessingcharge()); Assertions.assertEquals("peerReviewed", instance.getRefereed()); } - @Test - public void testDatasetDump(){ + public void testDatasetDump() { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance") + .getPath(); final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); DumpProducts dump = new DumpProducts(); dump - .run(false, sourcePath, workingDir.toString() + "/result", - communityMapPath, Dataset.class, - GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); + .run( + false, sourcePath, workingDir.toString() + "/result", + communityMapPath, Dataset.class, + GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); + .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); Assertions.assertEquals(1, verificationDataset.count()); Assertions.assertEquals(1, verificationDataset.filter("type = 'dataset'").count()); - //the common fields in the result have been already checked. Now checking only + // the common fields in the result have been already checked. Now checking only // community specific fields GraphResult gr = verificationDataset.first(); @@ -353,10 +442,33 @@ public class DumpJobTest { Assertions.assertEquals(2, gr.getGeolocation().stream().filter(gl -> gl.getBox().equals("")).count()); Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPlace().equals("")).count()); Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("")).count()); - Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPlace().equals("18 York St, Ottawa, ON K1N 5S6; Ottawa; Ontario; Canada")).count()); - Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("45.427242 -75.693904")).count()); - Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("") && !gl.getPlace().equals("")).count()); - Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> !gl.getPoint().equals("") && gl.getPlace().equals("")).count()); + Assertions + .assertEquals( + 1, + gr + .getGeolocation() + .stream() + .filter(gl -> gl.getPlace().equals("18 York St, Ottawa, ON K1N 5S6; Ottawa; Ontario; Canada")) + .count()); + Assertions + .assertEquals( + 1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("45.427242 -75.693904")).count()); + Assertions + .assertEquals( + 1, + gr + .getGeolocation() + .stream() + .filter(gl -> gl.getPoint().equals("") && !gl.getPlace().equals("")) + .count()); + Assertions + .assertEquals( + 1, + gr + .getGeolocation() + .stream() + .filter(gl -> !gl.getPoint().equals("") && gl.getPlace().equals("")) + .count()); Assertions.assertEquals("1024Gb", gr.getSize()); @@ -373,30 +485,30 @@ public class DumpJobTest { } @Test - public void testSoftwareDump(){ + public void testSoftwareDump() { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance") + .getPath(); final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); DumpProducts dump = new DumpProducts(); dump - .run(false, sourcePath, workingDir.toString() + "/result", - communityMapPath, Software.class, - GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); + .run( + false, sourcePath, workingDir.toString() + "/result", + communityMapPath, Software.class, + GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); + .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); Assertions.assertEquals(1, verificationDataset.count()); @@ -412,7 +524,6 @@ public class DumpJobTest { Assertions.assertEquals("perl", gr.getProgrammingLanguage()); - Assertions.assertEquals(null, gr.getContainer()); Assertions.assertEquals(null, gr.getContactperson()); Assertions.assertEquals(null, gr.getContactgroup()); @@ -424,30 +535,30 @@ public class DumpJobTest { } @Test - public void testOrpDump(){ + public void testOrpDump() { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance") + .getPath(); final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); DumpProducts dump = new DumpProducts(); dump - .run(false, sourcePath, workingDir.toString() + "/result", - communityMapPath, OtherResearchProduct.class, - GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); + .run( + false, sourcePath, workingDir.toString() + "/result", + communityMapPath, OtherResearchProduct.class, + GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); + .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); Assertions.assertEquals(1, verificationDataset.count()); @@ -466,7 +577,6 @@ public class DumpJobTest { Assertions.assertTrue(gr.getTool().contains("tool1")); Assertions.assertTrue(gr.getTool().contains("tool2")); - Assertions.assertEquals(null, gr.getContainer()); Assertions.assertEquals(null, gr.getDocumentationUrl()); Assertions.assertEquals(null, gr.getCodeRepositoryUrl()); @@ -481,32 +591,33 @@ public class DumpJobTest { public void testPublicationDumpCommunity() throws JsonProcessingException { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance") + .getPath(); final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); DumpProducts dump = new DumpProducts(); dump - .run(false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); + .run( + false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, + CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); Assertions.assertEquals(1, verificationDataset.count()); Assertions.assertEquals(1, verificationDataset.filter("type = 'publication'").count()); - //the common fields in the result have been already checked. Now checking only + // the common fields in the result have been already checked. Now checking only // community specific fields CommunityResult cr = verificationDataset.first(); @@ -519,15 +630,20 @@ public class DumpJobTest { Assertions.assertEquals("0.9", cr.getContext().get(0).getProvenance().get(0).getTrust()); Assertions.assertEquals(1, cr.getCollectedfrom().size()); - Assertions.assertEquals("10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getCollectedfrom().get(0).getKey()); + Assertions + .assertEquals("10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getCollectedfrom().get(0).getKey()); Assertions.assertEquals("Pensoft", cr.getCollectedfrom().get(0).getValue()); Assertions.assertEquals(1, cr.getInstance().size()); - Assertions.assertEquals("10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getInstance().get(0).getCollectedfrom().getKey()); + Assertions + .assertEquals( + "10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", + cr.getInstance().get(0).getCollectedfrom().getKey()); Assertions.assertEquals("Pensoft", cr.getInstance().get(0).getCollectedfrom().getValue()); - Assertions.assertEquals("10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd", cr.getInstance().get(0).getHostedby().getKey()); - Assertions.assertEquals("One Ecosystem",cr.getInstance().get(0).getHostedby().getValue()); - + Assertions + .assertEquals( + "10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd", cr.getInstance().get(0).getHostedby().getKey()); + Assertions.assertEquals("One Ecosystem", cr.getInstance().get(0).getHostedby().getValue()); } @@ -587,8 +703,6 @@ public class DumpJobTest { Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90); - - } @Test @@ -650,7 +764,6 @@ public class DumpJobTest { Assertions.assertEquals(0, verificationDataset.count()); - } @Test @@ -718,7 +831,6 @@ public class DumpJobTest { Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count()); - } @Test @@ -814,7 +926,6 @@ public class DumpJobTest { Assertions.assertEquals(23, verificationDataset.count()); - Assertions.assertEquals(23, verificationDataset.filter("type = 'publication'").count()); verificationDataset.createOrReplaceTempView("check"); @@ -832,7 +943,6 @@ public class DumpJobTest { Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1); - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java index c950f1c91..a17861ad7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java @@ -8,16 +8,17 @@ import java.nio.file.Files; import java.nio.file.Path; import org.apache.commons.io.FileUtils; -import org.apache.neethi.Assertion; + import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.ForeachFunction; + import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; + import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.slf4j.Logger; @@ -229,107 +230,98 @@ public class PrepareResultProjectJobTest { public void testMatchValidated() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_validatedRels") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_validatedRels") + .getPath(); SparkPrepareResultProject.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/preparedInfo", - "-sourcePath", sourcePath + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/preparedInfo", + "-sourcePath", sourcePath }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/preparedInfo") - .map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class)); + .textFile(workingDir.toString() + "/preparedInfo") + .map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); + .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertTrue(verificationDataset.count() == 2); + assertEquals(2, verificationDataset.count() ); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); verificationDataset.createOrReplaceTempView("dataset"); - String query = "select resultId, MyT.id project , MyT.title title, MyT.acronym acronym , MyT.provenance.provenance provenance, " + - "MyT.validated.validatedByFunder, MyT.validated.validationDate " - + "from dataset " - + "lateral view explode(projectsList) p as MyT "; + String query = "select resultId, MyT.id project , MyT.title title, MyT.acronym acronym , MyT.provenance.provenance provenance, " + + + "MyT.validated.validatedByFunder, MyT.validated.validationDate " + + "from dataset " + + "lateral view explode(projectsList) p as MyT "; org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query); - Assertions.assertEquals(3, resultExplodedProvenance.count()); - Assertions.assertEquals(3, resultExplodedProvenance.filter("validatedByFunder = true").count()); - Assertions - .assertEquals( - 2, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals(3, resultExplodedProvenance.count()); + assertEquals(3, resultExplodedProvenance.filter("validatedByFunder = true").count()); + assertEquals( + 2, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); - Assertions - .assertEquals( - 2, - resultExplodedProvenance - .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") - .count()); + assertEquals( + 2, + resultExplodedProvenance + .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " + - "and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " + - "and validatedByFunder = true " + - "and validationDate = '2021-08-06'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " + + "and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " + + "and validatedByFunder = true " + + "and validationDate = '2021-08-06'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " + - "and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80' " + - "and validatedByFunder = true and validationDate = '2021-08-04'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " + + "and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80' " + + "and validatedByFunder = true and validationDate = '2021-08-04'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' " + - "and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " + - "and validatedByFunder = true and validationDate = '2021-08-05'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' " + + "and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " + + "and validatedByFunder = true and validationDate = '2021-08-05'") + .count()); - Assertions - .assertEquals( - 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + assertEquals( + 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java index 61102232e..9710e4553 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java @@ -8,8 +8,6 @@ import java.util.HashMap; import java.util.logging.Filter; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.dump.oaf.community.Funder; -import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -31,6 +29,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo; import eu.dnetlib.dhp.schema.dump.oaf.Result; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; +import eu.dnetlib.dhp.schema.dump.oaf.community.Funder; +import eu.dnetlib.dhp.schema.dump.oaf.community.Project; public class UpdateProjectInfoTest { @@ -142,26 +142,26 @@ public class UpdateProjectInfoTest { } @Test - public void testValidatedRelation() throws Exception{ + public void testValidatedRelation() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo") + .getPath(); SparkUpdateProjectInfo.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-preparedInfoPath", sourcePath + "/preparedInfoValidated", - "-outputPath", workingDir.toString() + "/result", - "-sourcePath", sourcePath + "/publication_extendedmodel" + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-preparedInfoPath", sourcePath + "/preparedInfoValidated", + "-outputPath", workingDir.toString() + "/result", + "-sourcePath", sourcePath + "/publication_extendedmodel" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); verificationDataset.show(false); @@ -169,10 +169,10 @@ public class UpdateProjectInfoTest { verificationDataset.createOrReplaceTempView("dataset"); String query = "select id, MyT.code code, MyT.title title, MyT.funder.name funderName, MyT.funder.shortName funderShortName, " - + - "MyT.funder.jurisdiction funderJurisdiction, MyT.funder.fundingStream fundingStream, MyT.validated " - + "from dataset " + - "lateral view explode(projects) p as MyT "; + + + "MyT.funder.jurisdiction funderJurisdiction, MyT.funder.fundingStream fundingStream, MyT.validated " + + "from dataset " + + "lateral view explode(projects) p as MyT "; org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query); @@ -180,27 +180,34 @@ public class UpdateProjectInfoTest { resultExplodedProvenance.show(false); Assertions - .assertEquals( - 2, - resultExplodedProvenance.filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2'").count()); + .assertEquals( + 2, + resultExplodedProvenance.filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2'").count()); Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2' and code = '123455'") - .count()); + .assertEquals( + 1, + resultExplodedProvenance + .filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2' and code = '123455'") + .count()); Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2' and code = '119027'") - .count()); + .assertEquals( + 1, + resultExplodedProvenance + .filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2' and code = '119027'") + .count()); Project project = verificationDataset - .map((MapFunction) cr -> cr.getProjects().stream().filter(p -> p.getValidated() != null).collect(Collectors.toList()).get(0) - , Encoders.bean(Project.class)).first(); + .map( + (MapFunction) cr -> cr + .getProjects() + .stream() + .filter(p -> p.getValidated() != null) + .collect(Collectors.toList()) + .get(0), + Encoders.bean(Project.class)) + .first(); Assertions.assertTrue(project.getFunder().getName().equals("Academy of Finland")); Assertions.assertTrue(project.getFunder().getShortName().equals("AKA")); @@ -208,18 +215,22 @@ public class UpdateProjectInfoTest { Assertions.assertTrue(project.getFunder().getFundingStream() == null); Assertions.assertTrue(project.getValidated().getValidationDate().equals("2021-08-06")); - project = verificationDataset - .map((MapFunction) cr -> cr.getProjects().stream().filter(p -> p.getValidated() == null).collect(Collectors.toList()).get(0) - , Encoders.bean(Project.class)).first(); - + .map( + (MapFunction) cr -> cr + .getProjects() + .stream() + .filter(p -> p.getValidated() == null) + .collect(Collectors.toList()) + .get(0), + Encoders.bean(Project.class)) + .first(); Assertions.assertTrue(project.getFunder().getName().equals("European Commission")); Assertions.assertTrue(project.getFunder().getShortName().equals("EC")); Assertions.assertTrue(project.getFunder().getJurisdiction().equals("EU")); Assertions.assertTrue(project.getFunder().getFundingStream().equals("H2020")); - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java index a310448a4..fe178795d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java @@ -94,7 +94,8 @@ public class DumpRelationTest { verificationDataset.createOrReplaceTempView("table"); - verificationDataset.foreach((ForeachFunction)r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + verificationDataset + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); Dataset check = spark .sql( @@ -134,13 +135,13 @@ public class DumpRelationTest { public void test2() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation_validated") - .getPath(); + .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation_validated") + .getPath(); SparkDumpRelationJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/relation", - "-sourcePath", sourcePath + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/relation", + "-sourcePath", sourcePath }); // dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset"); @@ -148,57 +149,58 @@ public class DumpRelationTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + .textFile(workingDir.toString() + "/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); verificationDataset.createOrReplaceTempView("table"); - verificationDataset.foreach((ForeachFunction)r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + verificationDataset + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); Dataset check = spark - .sql( - "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " - + - "from table "); + .sql( + "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " + + + "from table "); Assertions.assertEquals(20, check.filter("name = 'isProvidedBy'").count()); Assertions - .assertEquals( - 20, check - .filter( - "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + - "provenance = 'Harvested'") - .count()); + .assertEquals( + 20, check + .filter( + "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + + "provenance = 'Harvested'") + .count()); Assertions.assertEquals(7, check.filter("name = 'isParticipant'").count()); Assertions - .assertEquals( - 7, check - .filter( - "name = 'isParticipant' and stype = 'organization' and ttype = 'project' " + - "and provenance = 'Harvested'") - .count()); + .assertEquals( + 7, check + .filter( + "name = 'isParticipant' and stype = 'organization' and ttype = 'project' " + + "and provenance = 'Harvested'") + .count()); Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count()); Assertions - .assertEquals( - 1, check - .filter( - "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + - "and provenance = 'Inferred by OpenAIRE'") - .count()); + .assertEquals( + 1, check + .filter( + "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + + "and provenance = 'Inferred by OpenAIRE'") + .count()); Assertions.assertEquals(2, check.filter("name = 'isProducedBy'").count()); Assertions - .assertEquals( - 2, check - .filter( - "name = 'isProducedBy' and stype = 'project' and ttype = 'result' " + - "and provenance = 'Harvested' and validated = true " + - "and validationDate = '2021-08-06'") - .count()); + .assertEquals( + 2, check + .filter( + "name = 'isProducedBy' and stype = 'project' and ttype = 'result' " + + "and provenance = 'Harvested' and validated = true " + + "and validationDate = '2021-08-06'") + .count()); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java index 7bb6b9ea9..08fcd49a8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java @@ -6,7 +6,6 @@ import static org.mockito.Mockito.lenient; import java.util.*; import java.util.function.Consumer; -import eu.dnetlib.dhp.schema.common.ModelSupport; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -14,6 +13,7 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -529,7 +529,8 @@ class QueryInformationSystemTest { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.execContextRelationQuery(); - queryInformationSystem.getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource")); + queryInformationSystem + .getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource")); Assertions.assertEquals(5, cInfoList.size()); } @@ -540,7 +541,8 @@ class QueryInformationSystemTest { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.execContextRelationQuery(); - queryInformationSystem.getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource")); + queryInformationSystem + .getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource")); cInfoList.forEach(contextInfo -> { switch (contextInfo.getId()) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java index ad4ea36c4..477481c08 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java @@ -145,8 +145,8 @@ public class SplitPerFunderTest { // CONICYT 0 tmp = sc - .textFile(workingDir.toString() + "/split/CONICYTF") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + .textFile(workingDir.toString() + "/split/CONICYTF") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); Assertions.assertEquals(0, tmp.count()); } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index 8daf318be..6229ad19b 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -84,7 +84,8 @@ public class IndexRecordTransformerTest { @Test public void testForEOSCFutureTraining() throws IOException, TransformerException { - final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/training-notebooks-seadatanet.xml")); + final String record = IOUtils + .toString(getClass().getResourceAsStream("eosc-future/training-notebooks-seadatanet.xml")); testRecordTransformation(record); } diff --git a/pom.xml b/pom.xml index 4d3bcee60..4be425779 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.18] + [2.7.19] [4.0.3] [6.0.5] [3.1.6] From e653756e3d855ae7c838afdb8c09b61cfb17d749 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 4 Oct 2021 18:40:07 +0200 Subject: [PATCH 258/287] applied some suggestiond from Sonar Lint --- .../dnetlib/dhp/oa/graph/dump/Constants.java | 27 ++-- .../dhp/oa/graph/dump/DumpProducts.java | 5 +- .../eu/dnetlib/dhp/oa/graph/dump/MakeTar.java | 12 +- .../oa/graph/dump/QueryInformationSystem.java | 4 +- .../dhp/oa/graph/dump/ResultMapper.java | 122 +++++++++--------- .../dhp/oa/graph/dump/SaveCommunityMap.java | 8 +- .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 23 +--- .../graph/dump/community/CommunitySplit.java | 8 +- .../community/SparkPrepareResultProject.java | 6 +- .../dump/complete/CreateContextRelation.java | 3 +- .../dhp/oa/graph/dump/complete/Extractor.java | 43 +++--- .../dhp/oa/graph/dump/complete/Process.java | 5 +- .../dump/complete/QueryInformationSystem.java | 18 ++- .../dump/complete/SparkDumpRelationJob.java | 33 +++-- .../complete/SparkOrganizationRelation.java | 3 +- .../SparkSelectValidRelationsJob.java | 1 - .../dump/exceptions/MyRuntimeException.java | 30 +++++ .../NoAvailableEntityTypeException.java | 29 +++++ .../funderresults/SparkDumpFunderResults.java | 3 - .../SparkResultLinkedToProject.java | 3 - .../dump/PrepareResultProjectJobTest.java | 91 +++++++------ ...DumpOrganizationProjectDatasourceTest.java | 5 +- .../dhp/oa/graph/hostedbymap/TestApply.scala | 2 +- .../complete/organization/organization.json | 4 +- 24 files changed, 274 insertions(+), 214 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/exceptions/MyRuntimeException.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/exceptions/NoAvailableEntityTypeException.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java index 6dca09b63..61167639c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java @@ -5,37 +5,40 @@ import java.util.Map; import com.google.common.collect.Maps; +import eu.dnetlib.dhp.schema.common.ModelConstants; + public class Constants { - public static final Map accessRightsCoarMap = Maps.newHashMap(); - public static final Map coarCodeLabelMap = Maps.newHashMap(); + protected static final Map accessRightsCoarMap = Maps.newHashMap(); + protected static final Map coarCodeLabelMap = Maps.newHashMap(); public static final String INFERRED = "Inferred by OpenAIRE"; + public static final String CABF2 = "c_abf2"; public static final String HARVESTED = "Harvested"; public static final String DEFAULT_TRUST = "0.9"; public static final String USER_CLAIM = "Linked by user"; - public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; + public static final String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; - public static String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/"; + public static final String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/"; - public static String RESEARCH_COMMUNITY = "Research Community"; + public static final String RESEARCH_COMMUNITY = "Research Community"; - public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative"; + public static final String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative"; static { - accessRightsCoarMap.put("OPEN", "c_abf2"); + accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2); accessRightsCoarMap.put("RESTRICTED", "c_16ec"); - accessRightsCoarMap.put("OPEN SOURCE", "c_abf2"); - accessRightsCoarMap.put("CLOSED", "c_14cb"); - accessRightsCoarMap.put("EMBARGO", "c_f1cf"); + accessRightsCoarMap.put("OPEN SOURCE", CABF2); + accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_CLOSED, "c_14cb"); + accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_EMBARGO, "c_f1cf"); } static { - coarCodeLabelMap.put("c_abf2", "OPEN"); + coarCodeLabelMap.put(CABF2, ModelConstants.ACCESS_RIGHT_OPEN); coarCodeLabelMap.put("c_16ec", "RESTRICTED"); - coarCodeLabelMap.put("c_14cb", "CLOSED"); + coarCodeLabelMap.put("c_14cb", ModelConstants.ACCESS_RIGHT_CLOSED); coarCodeLabelMap.put("c_f1cf", "EMBARGO"); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java index d09e584a3..68e1e8402 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java @@ -17,6 +17,7 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; import eu.dnetlib.dhp.schema.oaf.*; /** @@ -66,7 +67,7 @@ public class DumpProducts implements Serializable { private static O execMap(I value, CommunityMap communityMap, - String dumpType) { + String dumpType) throws NoAvailableEntityTypeException { Optional odInfo = Optional.ofNullable(value.getDataInfo()); if (odInfo.isPresent()) { @@ -94,7 +95,7 @@ public class DumpProducts implements Serializable { } return null; }).filter(Objects::nonNull).collect(Collectors.toList()); - if (toDumpFor.size() == 0) { + if (toDumpFor.isEmpty()) { return null; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java index 0d2df11fe..b56849a3a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java @@ -57,16 +57,16 @@ public class MakeTar implements Serializable { public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit) throws IOException { - RemoteIterator dir_iterator = fileSystem.listLocatedStatus(new Path(inputPath)); + RemoteIterator dirIterator = fileSystem.listLocatedStatus(new Path(inputPath)); - while (dir_iterator.hasNext()) { - LocatedFileStatus fileStatus = dir_iterator.next(); + while (dirIterator.hasNext()) { + LocatedFileStatus fileStatus = dirIterator.next(); Path p = fileStatus.getPath(); - String p_string = p.toString(); - String entity = p_string.substring(p_string.lastIndexOf("/") + 1); + String pathString = p.toString(); + String entity = pathString.substring(pathString.lastIndexOf("/") + 1); - MakeTarArchive.tarMaxSize(fileSystem, p_string, outputPath + "/" + entity, entity, gBperSplit); + MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index 4d0c59955..b972de6e9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -40,10 +40,10 @@ public class QueryInformationSystem { "{$x//CONFIGURATION/context/@label}" + ""; - public CommunityMap getCommunityMap(boolean singleCommunity, String community_id) + public CommunityMap getCommunityMap(boolean singleCommunity, String communityId) throws ISLookUpException, DocumentException, SAXException { if (singleCommunity) - return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + community_id + "'"))); + return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + communityId + "'"))); return getMap(isLookUp.quickSearchProfile(XQUERY_ALL)); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 988a8c7a4..0f3192c25 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -6,9 +6,8 @@ import java.util.*; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Encoders; +import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.*; import eu.dnetlib.dhp.schema.dump.oaf.AccessRight; @@ -29,7 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.*; public class ResultMapper implements Serializable { public static Result map( - E in, Map communityMap, String dumpType) { + E in, Map communityMap, String dumpType) throws NoAvailableEntityTypeException { Result out; if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { @@ -136,6 +135,8 @@ public class ResultMapper implements Serializable { out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); break; + default: + throw new NoAvailableEntityTypeException(); } Optional> mes = Optional.ofNullable(input.getMeasures()); @@ -156,17 +157,15 @@ public class ResultMapper implements Serializable { // I do not map Access Right UNKNOWN or OTHER Optional oar = Optional.ofNullable(input.getBestaccessright()); - if (oar.isPresent()) { - if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); - out - .setBestaccessright( - AccessRight - .newInstance( - code, - Constants.coarCodeLabelMap.get(code), - Constants.COAR_ACCESS_RIGHT_SCHEMA)); - } + if (oar.isPresent() && Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) { + String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); + out + .setBestaccessright( + AccessRight + .newInstance( + code, + Constants.coarCodeLabelMap.get(code), + Constants.COAR_ACCESS_RIGHT_SCHEMA)); } final List contributorList = new ArrayList<>(); @@ -263,7 +262,7 @@ public class ResultMapper implements Serializable { .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) .collect(Collectors.toList()); - if (iTitle.size() > 0) { + if (!iTitle.isEmpty()) { out.setMaintitle(iTitle.get(0).getValue()); } @@ -272,7 +271,7 @@ public class ResultMapper implements Serializable { .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) .collect(Collectors.toList()); - if (iTitle.size() > 0) { + if (!iTitle.isEmpty()) { out.setSubtitle(iTitle.get(0).getValue()); } @@ -301,9 +300,8 @@ public class ResultMapper implements Serializable { Optional .ofNullable(input.getSource()) - .ifPresent(value -> out.setSource(value.stream().map(s -> s.getValue()).collect(Collectors.toList()))); - // value.stream().forEach(s -> sourceList.add(s.getValue()))); - // out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList())); + .ifPresent(value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList()))); + List subjectList = new ArrayList<>(); Optional .ofNullable(input.getSubject()) @@ -334,14 +332,14 @@ public class ResultMapper implements Serializable { value -> value .stream() .map(c -> { - String community_id = c.getId(); - if (community_id.indexOf("::") > 0) { - community_id = community_id.substring(0, community_id.indexOf("::")); + String communityId = c.getId(); + if (communityId.contains("::")) { + communityId = communityId.substring(0, communityId.indexOf("::")); } - if (communities.contains(community_id)) { + if (communities.contains(communityId)) { Context context = new Context(); - context.setCode(community_id); - context.setLabel(communityMap.get(community_id)); + context.setCode(communityId); + context.setLabel(communityMap.get(communityId)); Optional> dataInfo = Optional.ofNullable(c.getDataInfo()); if (dataInfo.isPresent()) { List provenance = new ArrayList<>(); @@ -361,7 +359,11 @@ public class ResultMapper implements Serializable { .filter(Objects::nonNull) .collect(Collectors.toSet())); - context.setProvenance(getUniqueProvenance(provenance)); + try { + context.setProvenance(getUniqueProvenance(provenance)); + } catch (NoAvailableEntityTypeException e) { + e.printStackTrace(); + } } return context; } @@ -371,7 +373,7 @@ public class ResultMapper implements Serializable { .collect(Collectors.toList())) .orElse(new ArrayList<>()); - if (contextList.size() > 0) { + if (!contextList.isEmpty()) { Set hashValue = new HashSet<>(); List remainigContext = new ArrayList<>(); contextList.forEach(c -> { @@ -417,35 +419,34 @@ public class ResultMapper implements Serializable { private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) { Optional opAr = Optional.ofNullable(i.getAccessright()); - if (opAr.isPresent()) { - if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid()); + if (opAr.isPresent() && Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) { + String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid()); - instance - .setAccessright( - AccessRight - .newInstance( - code, - Constants.coarCodeLabelMap.get(code), - Constants.COAR_ACCESS_RIGHT_SCHEMA)); - if (opAr.get().getOpenAccessRoute() != null) { - switch (opAr.get().getOpenAccessRoute()) { - case hybrid: - instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.hybrid); - break; - case gold: - instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.gold); - break; - case green: - instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.green); - break; - case bronze: - instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.bronze); - break; + instance + .setAccessright( + AccessRight + .newInstance( + code, + Constants.coarCodeLabelMap.get(code), + Constants.COAR_ACCESS_RIGHT_SCHEMA)); + if (opAr.get().getOpenAccessRoute() != null) { + switch (opAr.get().getOpenAccessRoute()) { + case hybrid: + instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.hybrid); + break; + case gold: + instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.gold); + break; + case green: + instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.green); + break; + case bronze: + instance.getAccessright().setOpenAccessRoute(OpenAccessRoute.bronze); + break; - } } } + } Optional @@ -498,7 +499,8 @@ public class ResultMapper implements Serializable { } - private static List getUniqueProvenance(List provenance) { + private static List getUniqueProvenance(List provenance) + throws NoAvailableEntityTypeException { Provenance iProv = new Provenance(); Provenance hProv = new Provenance(); @@ -520,6 +522,8 @@ public class ResultMapper implements Serializable { case Constants.USER_CLAIM: lProv = getHighestTrust(lProv, p); break; + default: + throw new NoAvailableEntityTypeException(); } } @@ -599,19 +603,19 @@ public class ResultMapper implements Serializable { } private static Pid getOrcid(List p) { - List pid_list = p.stream().map(pid -> { + List pidList = p.stream().map(pid -> { if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID) || (pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))) { return pid; } return null; - }).filter(pid -> pid != null).collect(Collectors.toList()); + }).filter(Objects::nonNull).collect(Collectors.toList()); - if (pid_list.size() == 1) { - return getAuthorPid(pid_list.get(0)); + if (pidList.size() == 1) { + return getAuthorPid(pidList.get(0)); } - List orcid = pid_list + List orcid = pidList .stream() .filter( ap -> ap @@ -622,7 +626,7 @@ public class ResultMapper implements Serializable { if (orcid.size() == 1) { return getAuthorPid(orcid.get(0)); } - orcid = pid_list + orcid = pidList .stream() .filter( ap -> ap diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index 971324744..1eff99892 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -31,9 +31,9 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; public class SaveCommunityMap implements Serializable { private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class); - private final QueryInformationSystem queryInformationSystem; + private final transient QueryInformationSystem queryInformationSystem; - private final BufferedWriter writer; + private final transient BufferedWriter writer; public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException { final Configuration conf = new Configuration(); @@ -84,12 +84,12 @@ public class SaveCommunityMap implements Serializable { } - private void saveCommunityMap(boolean singleCommunity, String community_id) + private void saveCommunityMap(boolean singleCommunity, String communityId) throws ISLookUpException, IOException, DocumentException, SAXException { writer .write( Utils.OBJECT_MAPPER - .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, community_id))); + .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId))); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index 09e301572..a19a3a5b6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -5,15 +5,13 @@ import java.io.Serializable; import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.api.MissingConceptDoiException; import eu.dnetlib.dhp.common.api.ZenodoAPIClient; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; public class SendToZenodoHDFS implements Serializable { @@ -21,8 +19,6 @@ public class SendToZenodoHDFS implements Serializable { private static final String VERSION = "version"; // to be used to upload a new version of a published deposition private static final String UPDATE = "update"; // to upload content to an open deposition not published - private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class); - public static void main(final String[] args) throws Exception, MissingConceptDoiException { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -48,15 +44,12 @@ public class SendToZenodoHDFS implements Serializable { .orElse(false); final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null); - // final String communityMapPath = parser.get("communityMapPath"); Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); - // CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath); - RemoteIterator fileStatusListIterator = fileSystem .listFiles( new Path(hdfsPath), true); @@ -77,19 +70,17 @@ public class SendToZenodoHDFS implements Serializable { } zenodoApiClient.uploadOpenDeposition(depositionId); break; + default: + throw new NoAvailableEntityTypeException(); } while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); Path p = fileStatus.getPath(); - String p_string = p.toString(); - if (!p_string.endsWith("_SUCCESS")) { - String name = p_string.substring(p_string.lastIndexOf("/") + 1); -// log.info("Sending information for community: " + name); -// if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) { -// name = communityMap.get(name.substring(0, name.lastIndexOf("."))).replace(" ", "_") + ".tar"; -// } + String pString = p.toString(); + if (!pString.endsWith("_SUCCESS")) { + String name = pString.substring(pString.lastIndexOf("/") + 1); FSDataInputStream inputStream = fileSystem.open(p); zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen()); @@ -101,7 +92,7 @@ public class SendToZenodoHDFS implements Serializable { zenodoApiClient.sendMretadata(metadata); } - if (publish) { + if (Boolean.TRUE.equals(publish)) { zenodoApiClient.publish(); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index 6abca2255..ee6341f84 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.Optional; -import java.util.Set; import java.util.stream.Collectors; import org.apache.spark.SparkConf; @@ -16,6 +15,7 @@ import org.apache.spark.sql.SparkSession; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; +import eu.dnetlib.dhp.schema.dump.oaf.community.Context; /** * This class splits the dumped results according to the research community - research initiative/infrastructure they @@ -56,10 +56,10 @@ public class CommunitySplit implements Serializable { } private static void printResult(String c, Dataset result, String outputPath) { - Dataset community_products = result + Dataset communityProducts = result .filter((FilterFunction) r -> containsCommunity(r, c)); - community_products + communityProducts .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) @@ -72,7 +72,7 @@ public class CommunitySplit implements Serializable { return r .getContext() .stream() - .map(con -> con.getCode()) + .map(Context::getCode) .collect(Collectors.toList()) .contains(c); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java index 3508e4126..87ac20948 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java @@ -161,7 +161,7 @@ public class SparkPrepareResultProject implements Serializable { provenance.setTrust(di.get().getTrust()); p.setProvenance(provenance); } - if (relation.getValidated()) { + if (Boolean.TRUE.equals(relation.getValidated())) { p.setValidated(Validated.newInstance(relation.getValidated(), relation.getValidationDate())); } return p; @@ -179,8 +179,8 @@ public class SparkPrepareResultProject implements Serializable { f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText()); f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText()); for (Object o : doc.selectNodes("//funding_level_0")) { - List node = ((Node) o).selectNodes("./name"); - f.setFundingStream(((Node) node.get(0)).getText()); + List node = ((Node) o).selectNodes("./name"); + f.setFundingStream((node.get(0)).getText()); } return f; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index 0c8f5dba4..a468e334d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.dump.oaf.graph.*; import eu.dnetlib.dhp.schema.oaf.Datasource; @@ -120,7 +121,7 @@ public class CreateContextRelation implements Serializable { writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r)); writer.newLine(); } catch (final Exception e) { - throw new RuntimeException(e); + throw new MyRuntimeException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java index 7e8ddfaf8..3e47ee664 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java @@ -62,22 +62,7 @@ public class Extractor implements Serializable { .readPath(spark, inputPath, inputClazz) .flatMap((FlatMapFunction) value -> { List relationList = new ArrayList<>(); - Optional - .ofNullable(value.getInstance()) - .ifPresent(inst -> inst.forEach(instance -> { - Optional - .ofNullable(instance.getCollectedfrom()) - .ifPresent( - cf -> getRelatioPair( - value, relationList, cf, - ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes)); - Optional - .ofNullable(instance.getHostedby()) - .ifPresent( - hb -> getRelatioPair( - value, relationList, hb, - Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes)); - })); + extractRelationsFromInstance(hashCodes, value, relationList); Set communities = communityMap.keySet(); Optional .ofNullable(value.getContext()) @@ -136,8 +121,28 @@ public class Extractor implements Serializable { } + private void extractRelationsFromInstance(Set hashCodes, R value, + List relationList) { + Optional + .ofNullable(value.getInstance()) + .ifPresent(inst -> inst.forEach(instance -> { + Optional + .ofNullable(instance.getCollectedfrom()) + .ifPresent( + cf -> getRelatioPair( + value, relationList, cf, + ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes)); + Optional + .ofNullable(instance.getHostedby()) + .ifPresent( + hb -> getRelatioPair( + value, relationList, hb, + Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes)); + })); + } + private static void getRelatioPair(R value, List relationList, KeyValue cf, - String result_dtasource, String datasource_result, + String resultDatasource, String datasourceResult, Set hashCodes) { Provenance provenance = Optional .ofNullable(cf.getDataInfo()) @@ -162,7 +167,7 @@ public class Extractor implements Serializable { Relation r = getRelation( value.getId(), cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY, - result_dtasource, ModelConstants.PROVISION, + resultDatasource, ModelConstants.PROVISION, provenance); if (!hashCodes.contains(r.hashCode())) { relationList @@ -173,7 +178,7 @@ public class Extractor implements Serializable { r = getRelation( cf.getKey(), value.getId(), Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY, - datasource_result, ModelConstants.PROVISION, + datasourceResult, ModelConstants.PROVISION, provenance); if (!hashCodes.contains(r.hashCode())) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java index b0b8b8acd..e1cb0e753 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java @@ -9,6 +9,7 @@ import org.apache.commons.lang3.StringUtils; import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.dump.oaf.Provenance; @@ -43,7 +44,7 @@ public class Process implements Serializable { return (R) ri; } catch (final Exception e) { - throw new RuntimeException(e); + throw new MyRuntimeException(e); } } @@ -91,7 +92,7 @@ public class Process implements Serializable { return relationList; } catch (final Exception e) { - throw new RuntimeException(e); + throw new MyRuntimeException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java index 88c92da53..fcc4bf39c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java @@ -13,7 +13,6 @@ import org.dom4j.io.SAXReader; import org.jetbrains.annotations.NotNull; import org.xml.sax.SAXException; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -94,9 +93,9 @@ public class QueryInformationSystem { Element root = doc.getRootElement(); cinfo.setId(root.attributeValue("id")); - Iterator it = root.elementIterator(); + Iterator it = root.elementIterator(); while (it.hasNext()) { - Element el = (Element) it.next(); + Element el = it.next(); if (el.getName().equals("category")) { String categoryId = el.attributeValue("id"); categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2); @@ -143,7 +142,7 @@ public class QueryInformationSystem { if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) { return null; } - String funder = null; + String funder = ""; String grantId = null; String funding = null; for (Object node : el.selectNodes(".//param")) { @@ -158,9 +157,12 @@ public class QueryInformationSystem { case "CD_PROJECT_NUMBER": grantId = n.getText(); break; + default: + break; } } String nsp = null; + switch (funder.toLowerCase()) { case "ec": if (funding == null) { @@ -179,10 +181,12 @@ public class QueryInformationSystem { nsp = "dfgf________::"; break; default: - nsp = funder.toLowerCase(); + StringBuilder bld = new StringBuilder(); + bld.append(funder.toLowerCase()); for (int i = funder.length(); i < 12; i++) - nsp += "_"; - nsp += "::"; + bld.append("_"); + bld.append("::"); + nsp = bld.toString(); } return prefix + "|" + nsp + DHPUtils.md5(grantId); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java index ab7e7e595..ddfd6592f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java @@ -71,22 +71,22 @@ public class SparkDumpRelationJob implements Serializable { Dataset relations = Utils.readPath(spark, inputPath, Relation.class); relations .map((MapFunction) relation -> { - eu.dnetlib.dhp.schema.dump.oaf.graph.Relation rel_new = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation(); - rel_new + eu.dnetlib.dhp.schema.dump.oaf.graph.Relation relNew = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation(); + relNew .setSource( Node .newInstance( relation.getSource(), ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2)))); - rel_new + relNew .setTarget( Node .newInstance( relation.getTarget(), ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2)))); - rel_new + relNew .setReltype( RelType .newInstance( @@ -96,23 +96,22 @@ public class SparkDumpRelationJob implements Serializable { Optional odInfo = Optional.ofNullable(relation.getDataInfo()); if (odInfo.isPresent()) { DataInfo dInfo = odInfo.get(); - if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent()) { - if (Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) { - rel_new - .setProvenance( - Provenance - .newInstance( - dInfo.getProvenanceaction().getClassname(), - dInfo.getTrust())); - } + if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() && + Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) { + relNew + .setProvenance( + Provenance + .newInstance( + dInfo.getProvenanceaction().getClassname(), + dInfo.getTrust())); } } - if (relation.getValidated()) { - rel_new.setValidated(relation.getValidated()); - rel_new.setValidationDate(relation.getValidationDate()); + if (Boolean.TRUE.equals(relation.getValidated())) { + relNew.setValidated(relation.getValidated()); + relNew.setValidationDate(relation.getValidationDate()); } - return rel_new; + return relNew; }, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class)) .write() diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index 4365e861f..f9d2123e2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -58,7 +58,8 @@ public class SparkOrganizationRelation implements Serializable { final OrganizationMap organizationMap = new Gson() .fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class); - log.info("organization map : {}", new Gson().toJson(organizationMap)); + final String serializedOrganizationMap = new Gson().toJson(organizationMap); + log.info("organization map : {}", serializedOrganizationMap); final String communityMapPath = parser.get("communityMapPath"); log.info("communityMapPath: {}", communityMapPath); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java index e729e13df..20f3fc4a7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java @@ -131,7 +131,6 @@ public class SparkSelectValidRelationsJob implements Serializable { .option("compression", "gzip") .mode(SaveMode.Overwrite) .json(outputPath); - ; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/exceptions/MyRuntimeException.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/exceptions/MyRuntimeException.java new file mode 100644 index 000000000..e325c4334 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/exceptions/MyRuntimeException.java @@ -0,0 +1,30 @@ + +package eu.dnetlib.dhp.oa.graph.dump.exceptions; + +public class MyRuntimeException extends RuntimeException { + + public MyRuntimeException() { + super(); + } + + public MyRuntimeException( + final String message, + final Throwable cause, + final boolean enableSuppression, + final boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + + public MyRuntimeException(final String message, final Throwable cause) { + super(message, cause); + } + + public MyRuntimeException(final String message) { + super(message); + } + + public MyRuntimeException(final Throwable cause) { + super(cause); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/exceptions/NoAvailableEntityTypeException.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/exceptions/NoAvailableEntityTypeException.java new file mode 100644 index 000000000..f3fbf740a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/exceptions/NoAvailableEntityTypeException.java @@ -0,0 +1,29 @@ + +package eu.dnetlib.dhp.oa.graph.dump.exceptions; + +public class NoAvailableEntityTypeException extends Exception { + public NoAvailableEntityTypeException() { + super(); + } + + public NoAvailableEntityTypeException( + final String message, + final Throwable cause, + final boolean enableSuppression, + final boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + + public NoAvailableEntityTypeException(final String message, final Throwable cause) { + super(message, cause); + } + + public NoAvailableEntityTypeException(final String message) { + super(message); + } + + public NoAvailableEntityTypeException(final Throwable cause) { + super(cause); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 9cddba3c5..6aed9d7a8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -10,7 +10,6 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.*; import org.slf4j.Logger; @@ -18,10 +17,8 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; -import eu.dnetlib.dhp.schema.oaf.Relation; /** * Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 156ae6d63..d89b9e86d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -10,7 +10,6 @@ import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; -import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -19,13 +18,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; -import scala.Tuple2; /** * Selects the results linked to projects. Only for these results the dump will be performed. diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java index a17861ad7..86d2f93c9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java @@ -8,17 +8,14 @@ import java.nio.file.Files; import java.nio.file.Path; import org.apache.commons.io.FileUtils; - import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; - import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; - import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.slf4j.Logger; @@ -248,14 +245,14 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - assertEquals(2, verificationDataset.count() ); + assertEquals(2, verificationDataset.count()); assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); verificationDataset.createOrReplaceTempView("dataset"); @@ -269,59 +266,59 @@ public class PrepareResultProjectJobTest { assertEquals(3, resultExplodedProvenance.count()); assertEquals(3, resultExplodedProvenance.filter("validatedByFunder = true").count()); assertEquals( - 2, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + 2, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); assertEquals( - 1, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") - .count()); + 1, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); assertEquals( - 2, - resultExplodedProvenance - .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") - .count()); + 2, + resultExplodedProvenance + .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") + .count()); assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " + - "and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " + - "and validatedByFunder = true " + - "and validationDate = '2021-08-06'") - .count()); + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " + + "and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " + + "and validatedByFunder = true " + + "and validationDate = '2021-08-06'") + .count()); assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " + - "and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80' " + - "and validatedByFunder = true and validationDate = '2021-08-04'") - .count()); + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' " + + "and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80' " + + "and validatedByFunder = true and validationDate = '2021-08-04'") + .count()); assertEquals( - 1, - resultExplodedProvenance - .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") - .count()); + 1, + resultExplodedProvenance + .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") + .count()); assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' " + - "and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " + - "and validatedByFunder = true and validationDate = '2021-08-05'") - .count()); + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' " + + "and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' " + + "and validatedByFunder = true and validationDate = '2021-08-05'") + .count()); assertEquals( - 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java index 89ecdfb2b..051141e18 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java @@ -6,6 +6,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; +import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -98,7 +99,7 @@ public class DumpOrganizationProjectDatasourceTest { } @Test - public void dumpProjectTest() { + public void dumpProjectTest() throws NoAvailableEntityTypeException { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/project") @@ -127,7 +128,7 @@ public class DumpOrganizationProjectDatasourceTest { } @Test - public void dumpDatasourceTest() { + public void dumpDatasourceTest() throws NoAvailableEntityTypeException { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasource") .getPath(); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala index 1bdcb60aa..4613d5636 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -54,7 +54,7 @@ class TestApply extends java.io.Serializable{ assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy")) assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) - assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid)) + assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.gold)) assertTrue(pa.getBestaccessright.getClassid.equals("OPEN")) assertTrue(pa.getBestaccessright.getClassname.equals("Open Access")) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/complete/organization/organization.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/complete/organization/organization.json index d172419bf..e141af787 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/complete/organization/organization.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/complete/organization/organization.json @@ -1,6 +1,5 @@ {"alternativeNames":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"한국초등도덕교육학회"}],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"KR","classname":"Korea (Republic of)","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::bd5cbea5dc434b8fd811a880cb9d4a05","lastupdatetimestamp":1566902414749,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Korean Elementary Moral Education Society"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Korean Elementary Moral Education Society"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.496778.3"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"http://www.ethics.or.kr/"}} {"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::9e9e8c76d739212c63eff362e321ba33","value":"NIH - National Institutes of Health"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2016-07-12","dateoftransformation":"2018-09-13","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"id":"20|nih_________::ffcd387c4ca1e9f9b60a398123e45904","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"CROSSPATHS MANAGEMENT SYSTEMS, INC."},"originalId":["nih_________::CROSSPATHS_MANAGEMENT_SYSTEMS__INC."],"pid":[]} -{"alternativeNames":[],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"CA","classname":"Canada","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::b91f67a34df55a0aa1aabdcb3700f413","lastupdatetimestamp":1566902407153,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"AHS - Stollery Children's Hospital"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Stollery Children's Hospital"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.416656.6"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"https://www.stollerykids.com/"}} {"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::a55eb91348674d853191f4f4fd73d078","value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020"}],"country":{"classid":"CZ","classname":"Czech Republic","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"dateofcollection":"2018-03-12","dateoftransformation":"2020-06-20","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"true"},"extraInfo":[],"id":"20|corda__h2020::56fd6f1eda222f51050b1ad488e1362a","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"INSTITUT MIKROELEKTRONICKYCH APLIKACI S.R.O."},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"IMA"},"originalId":["corda__h2020::999697424"],"pid":[],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"http://www.ima.cz"}} {"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::a55eb91348674d853191f4f4fd73d078","value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020"}],"country":{"classid":"MK","classname":"Former Yugoslav Republic of Macedonia","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"dateofcollection":"2020-03-31","dateoftransformation":"2020-06-20","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"extraInfo":[],"id":"20|corda__h2020::45032111512cb108a1c3518ec100848c","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"MARSECO DOO TETOVO"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"ORGANIZATION FORSERVICES AND INFORMATION TECHNOLOGIES MARSECO LTD TETOVO"},"originalId":["corda__h2020::901028345"],"pid":[],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"http://www.marseco.mk"}} {"alternativeNames":[],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"US","classname":"United States","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::c091a5e74444b017cf897f35a56afd7f","lastupdatetimestamp":1566902407729,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"SatCon Technology Corporation (United States)"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"SatCon Technology Corporation (United States)"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.421895.3"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"http://www.satcon.com/en/home"}} @@ -31,4 +30,5 @@ {"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::9e9e8c76d739212c63eff362e321ba33","value":"NIH - National Institutes of Health"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"dateofcollection":"2016-07-12","dateoftransformation":"2018-09-13","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"false"},"extraInfo":[],"id":"20|nih_________::08c33856ae8a471ac99a35a602f2e206","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"value":"LOFSTRAND LABS, LTD."},"originalId":["nih_________::LOFSTRAND_LABS__LTD."],"pid":[]} {"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::a55eb91348674d853191f4f4fd73d078","value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020"}],"country":{"classid":"ES","classname":"Spain","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"dateofcollection":"2020-05-16","dateoftransformation":"2020-06-20","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"extraInfo":[],"id":"20|corda__h2020::94ca0fe4f08bd80fce004ec642cbad58","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"INDUKERN,S.A."},"originalId":["corda__h2020::907568376"],"pid":[],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"http://www.indukern.es"}} {"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:actionset","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"extraInfo":[],"id":"20|microsoft___::fa306d92ad4c578bcdf0007d41e9aaf5","legalname":{"value":"Chiyoda Corporation"},"originalId":["147718715"],"pid":[],"websiteurl":{"value":"http://www.chiyoda-corp.com/en/index.html"}} -{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::a55eb91348674d853191f4f4fd73d078","value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020"}],"country":{"classid":"NO","classname":"Norway","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"dateofcollection":"2018-03-12","dateoftransformation":"2020-06-20","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"true"},"extraInfo":[],"id":"20|corda__h2020::9078c2ab5222c914200a52d00ffd5671","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"ERVIK & SAEVIK AS"},"originalId":["corda__h2020::920030257"],"pid":[]} \ No newline at end of file +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::a55eb91348674d853191f4f4fd73d078","value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020"}],"country":{"classid":"NO","classname":"Norway","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"dateofcollection":"2018-03-12","dateoftransformation":"2020-06-20","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"true"},"extraInfo":[],"id":"20|corda__h2020::9078c2ab5222c914200a52d00ffd5671","lastupdatetimestamp":1594398578323,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810000002384185791"},"value":"ERVIK & SAEVIK AS"},"originalId":["corda__h2020::920030257"],"pid":[]} +{"alternativeNames":[],"collectedfrom":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"key":"10|openaire____::ff4a008470319a22d9cf3d14af485977","value":"GRID - Global Research Identifier Database"}],"country":{"classid":"CA","classname":"Canada","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.91"},"dateofcollection":"","dateoftransformation":"","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"extraInfo":[],"id":"20|grid________::b91f67a34df55a0aa1aabdcb3700f413","lastupdatetimestamp":1566902407153,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"AHS - Stollery Children's Hospital"},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"Stollery Children's Hospital"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":""},"originalId":[],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"qualifier":{"classid":"grid","classname":"grid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"grid.416656.6"}],"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""},"trust":""},"value":"https://www.stollerykids.com/"}} \ No newline at end of file From 991b06bd0b03c867bdd429b9b70adff8b1b6ab89 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 5 Oct 2021 10:21:33 +0200 Subject: [PATCH 259/287] removed generation of EBI links from old dump, now EBI link dump is created by another wf --- .../eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala | 5 ----- .../eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala index f14e5f264..1924d919e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala @@ -32,14 +32,9 @@ object SparkEBILinksToOaf { import spark.implicits._ implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) - val ebi_rdd:Dataset[EBILinkItem] = spark.createDataset(spark.sparkContext.textFile(sourcePath).map(s => BioDBToOAF.extractEBILinksFromDump(s))).as[EBILinkItem] - - ebi_rdd.write.mode(SaveMode.Overwrite).save(s"${sourcePath}_dataset") - val ebLinks:Dataset[EBILinkItem] = spark.read.load(s"${sourcePath}_dataset").as[EBILinkItem].filter(l => l.links!= null) ebLinks.flatMap(j =>BioDBToOAF.parse_ebi_links(j.links)) - .repartition(4000) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) .write.mode(SaveMode.Overwrite).save(targetPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml index 3f442c5c6..1fa099ebc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml @@ -74,7 +74,7 @@ yarn-cluster cluster - Create Baselnie DataSet + Create Baseline DataSet eu.dnetlib.dhp.sx.ebi.SparkAddLinkUpdates dhp-graph-mapper-${projectVersion}.jar From b84e0cabeba15fe5ba3de269aab8b47ac953ce10 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 5 Oct 2021 16:34:47 +0200 Subject: [PATCH 260/287] Implemented new method for update baseline --- .../sx/graph/ebi/SparkDownloadEBILinks.scala | 100 +++++++++++++----- .../sx/graph/bio/pubmed/BioScholixTest.scala | 8 ++ 2 files changed, 79 insertions(+), 29 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala index 08e060459..eda825bd0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala @@ -4,6 +4,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.EBILinkItem import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import org.apache.commons.io.IOUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.{HttpGet, HttpUriRequest} import org.apache.http.impl.client.HttpClientBuilder @@ -25,38 +27,78 @@ object SparkDownloadEBILinks { } - def requestLinks(PMID:Long):String = { - val r = new HttpGet(s"https://www.ebi.ac.uk/europepmc/webservices/rest/MED/$PMID/datalinks?format=json") - val timeout = 60; // seconds - val config = RequestConfig.custom() - .setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000) - .setSocketTimeout(timeout * 1000).build() - val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() - try { - var tries = 4 - while (tries > 0) { - println(s"requesting ${r.getURI}") - try { - val response = client.execute(r) - println(s"get response with status${response.getStatusLine.getStatusCode}") - if (response.getStatusLine.getStatusCode > 400) { - tries -= 1 - } - else - return IOUtils.toString(response.getEntity.getContent) - } catch { - case e: Throwable => - println(s"Error on requesting ${r.getURI}") - e.printStackTrace() - tries -= 1 + def requestPage(url:String):String = { + val r = new HttpGet(url) + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + try { + var tries = 4 + while (tries > 0) { + println(s"requesting ${r.getURI}") + try { + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + if (response.getStatusLine.getStatusCode > 400) { + tries -= 1 } + else + return IOUtils.toString(response.getEntity.getContent) + } catch { + case e: Throwable => + println(s"Error on requesting ${r.getURI}") + e.printStackTrace() + tries -= 1 } - "" - } finally { - if (client != null) - client.close() } + "" + } finally { + if (client != null) + client.close() + } + } + + + def requestBaseLineUpdatePage():List[String] = { + val data =requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") + + val result =data.lines.filter(l => l.startsWith("") + val start = l.indexOf("= 0 && end >start) + l.substring(start+9, (end-start)) + else + "" + }.filter(s =>s.endsWith(".gz") ).map(s => s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s").toList + + result + } + + def downloadBaseLineUpdate(baselinePath:String, hdfsServerUri:String ):Unit = { + + + val conf = new Configuration + conf.set("fs.defaultFS", hdfsServerUri) + val fs = FileSystem.get(conf) + val p = new Path((baselinePath)) + val files = fs.listFiles(p,false) + + while (files.hasNext) { + val c = files.next() + c.getPath + + } + + + } + + + def requestLinks(PMID:Long):String = { + requestPage(s"https://www.ebi.ac.uk/europepmc/webservices/rest/MED/$PMID/datalinks?format=json") } def main(args: Array[String]): Unit = { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala index 8e063db7c..b6058f71b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala @@ -7,6 +7,7 @@ import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.ScholixResolved import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF import eu.dnetlib.dhp.sx.graph.bio.pubmed.PubMedToOaf.dataInfo +import eu.dnetlib.dhp.sx.graph.ebi.SparkDownloadEBILinks import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse @@ -50,6 +51,13 @@ class BioScholixTest extends AbstractVocabularyTest{ } + @Test + def testDownloadEBIUpdate() = { + val data = SparkDownloadEBILinks.requestBaseLineUpdatePage() + println(data) + } + + @Test def testEBIData() = { val inputXML = Source.fromInputStream(getClass.getResourceAsStream("pubmed.xml")).mkString From 2557bb41f54cbe8232372bfa1c554ce5d3177cbd Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 6 Oct 2021 16:41:08 +0200 Subject: [PATCH 261/287] Implemented new method for update baseline inside scala node --- .../ebi/SparkCreateBaselineDataFrame.scala | 123 +- .../sx/graph/ebi/SparkDownloadEBILinks.scala | 45 +- .../sx/graph/ebi/baseline_to_oaf_params.json | 9 +- .../dhp/sx/graph/ebi/ebi_download_update.json | 6 +- .../dhp/sx/graph/ebi/oozie_app/workflow.xml | 2 +- .../graph/ebi/update/oozie_app/workflow.xml | 116 +- .../sx/graph/bio/pubmed/BioScholixTest.scala | 7 +- .../dnetlib/dhp/sx/graph/bio/pubmed/ls_result | 1433 +++++++++++++++++ 8 files changed, 1625 insertions(+), 116 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala index 26efd723f..ee76cf8ad 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala @@ -6,18 +6,130 @@ import eu.dnetlib.dhp.schema.oaf.Result import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FSDataOutputStream, FileSystem, Path} +import org.apache.http.client.config.RequestConfig +import org.apache.http.client.methods.HttpGet +import org.apache.http.impl.client.HttpClientBuilder import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} +import java.io.InputStream import scala.io.Source import scala.xml.pull.XMLEventReader object SparkCreateBaselineDataFrame { + def requestBaseLineUpdatePage(maxFile:String):List[(String,String)] = { + val data =requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") + + val result =data.lines.filter(l => l.startsWith("") + val start = l.indexOf("= 0 && end >start) + l.substring(start+9, (end-start)) + else + "" + }.filter(s =>s.endsWith(".gz") ).filter(s => s > maxFile).map(s => (s,s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s")).toList + + result + } + + + def downloadBaselinePart(url:String):InputStream = { + val r = new HttpGet(url) + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + response.getEntity.getContent + + } + + def requestPage(url:String):String = { + val r = new HttpGet(url) + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + try { + var tries = 4 + while (tries > 0) { + println(s"requesting ${r.getURI}") + try { + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + if (response.getStatusLine.getStatusCode > 400) { + tries -= 1 + } + else + return IOUtils.toString(response.getEntity.getContent) + } catch { + case e: Throwable => + println(s"Error on requesting ${r.getURI}") + e.printStackTrace() + tries -= 1 + } + } + "" + } finally { + if (client != null) + client.close() + } + } + + + + + + + def downloadBaseLineUpdate(baselinePath:String, hdfsServerUri:String ):Unit = { + + + val conf = new Configuration + conf.set("fs.defaultFS", hdfsServerUri) + val fs = FileSystem.get(conf) + val p = new Path(baselinePath) + val files = fs.listFiles(p,false) + var max_file = "" + while (files.hasNext) { + val c = files.next() + val data = c.getPath.toString + val fileName = data.substring(data.lastIndexOf("/")+1) + + if (fileName> max_file) + max_file = fileName + } + + val files_to_download = requestBaseLineUpdatePage(max_file) + + files_to_download.foreach { u => + val hdfsWritePath: Path = new Path(s"$baselinePath/${u._1}") + val fsDataOutputStream: FSDataOutputStream = fs.create(hdfsWritePath, true) + val i = downloadBaselinePart(u._2) + val buffer = Array.fill[Byte](1024)(0) + while(i.read(buffer)>0) { + fsDataOutputStream.write(buffer) + } + i.close() + println(s"Downloaded ${u._2} into $baselinePath/${u._1}") + fsDataOutputStream.close() + } + + } + + val pmArticleAggregator: Aggregator[(String, PMArticle), PMArticle, PMArticle] = new Aggregator[(String, PMArticle), PMArticle, PMArticle] with Serializable { override def zero: PMArticle = new PMArticle @@ -51,6 +163,10 @@ object SparkCreateBaselineDataFrame { val targetPath = parser.get("targetPath") log.info("targetPath: {}", targetPath) + val hdfsServerUri = parser.get("hdfsServerUri") + log.info("hdfsServerUri: {}", targetPath) + + val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) val spark: SparkSession = @@ -61,16 +177,15 @@ object SparkCreateBaselineDataFrame { .master(parser.get("master")).getOrCreate() import spark.implicits._ - val sc = spark.sparkContext - - implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri) + val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline",2000) val ds:Dataset[PMArticle] = spark.createDataset(k.filter(i => i._1.endsWith(".gz")).flatMap(i =>{ val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes())) @@ -87,7 +202,5 @@ object SparkCreateBaselineDataFrame { .map(a => PubMedToOaf.convert(a, vocabularies)).as[Result] .filter(p => p!= null) .write.mode(SaveMode.Overwrite).save(targetPath) - - //s"$workingPath/oaf/baseline_oaf" } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala index eda825bd0..e940fdff0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala @@ -4,20 +4,16 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.EBILinkItem import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import org.apache.commons.io.IOUtils -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.http.client.config.RequestConfig -import org.apache.http.client.methods.{HttpGet, HttpUriRequest} +import org.apache.http.client.methods.HttpGet import org.apache.http.impl.client.HttpClientBuilder import org.apache.spark.SparkConf -import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql.functions.max -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkDownloadEBILinks { - def createEBILinks(pmid:Long):EBILinkItem = { val res = requestLinks(pmid) @@ -26,7 +22,6 @@ object SparkDownloadEBILinks { null } - def requestPage(url:String):String = { val r = new HttpGet(url) val timeout = 60; // seconds @@ -61,42 +56,6 @@ object SparkDownloadEBILinks { } } - - def requestBaseLineUpdatePage():List[String] = { - val data =requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") - - val result =data.lines.filter(l => l.startsWith("") - val start = l.indexOf("= 0 && end >start) - l.substring(start+9, (end-start)) - else - "" - }.filter(s =>s.endsWith(".gz") ).map(s => s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s").toList - - result - } - - def downloadBaseLineUpdate(baselinePath:String, hdfsServerUri:String ):Unit = { - - - val conf = new Configuration - conf.set("fs.defaultFS", hdfsServerUri) - val fs = FileSystem.get(conf) - val p = new Path((baselinePath)) - val files = fs.listFiles(p,false) - - while (files.hasNext) { - val c = files.next() - c.getPath - - } - - - } - - def requestLinks(PMID:Long):String = { requestPage(s"https://www.ebi.ac.uk/europepmc/webservices/rest/MED/$PMID/datalinks?format=json") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json index 38eb50094..4bee770bd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json @@ -1,6 +1,7 @@ [ - {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, - {"paramName":"i", "paramLongName":"isLookupUrl","paramDescription": "isLookupUrl", "paramRequired": true}, - {"paramName":"w", "paramLongName":"workingPath","paramDescription": "the path of the sequencial file to read", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath","paramDescription": "the oaf path ", "paramRequired": true} + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"i", "paramLongName":"isLookupUrl", "paramDescription": "isLookupUrl", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the oaf path ", "paramRequired": true}, + {"paramName":"h", "paramLongName":"hdfsServerUri", "paramDescription": "the working path ", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json index 0ae19234a..0860ed558 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json @@ -1,5 +1,5 @@ [ - {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, - {"paramName":"s", "paramLongName":"sourcePath","paramDescription": "the source Path", "paramRequired": true}, - {"paramName":"w", "paramLongName":"workingPath","paramDescription": "the working path ", "paramRequired": true} + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path ", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml index 1fa099ebc..7612321c0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml @@ -25,7 +25,6 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - yarn-cluster @@ -43,6 +42,7 @@ --workingPath${workingPath} --masteryarn + --hdfsServerUri${nameNode} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml index 1b738caed..cd3bb8c71 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml @@ -1,59 +1,67 @@ - - - - sourcePath - the Working Path - - - workingPath - the Working Path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - + + + + sourcePath + the Working Path + + + workingPath + the Working Path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + - + - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + - - - yarn-cluster - cluster - Incremental Download EBI Links - eu.dnetlib.dhp.sx.graph.ebi.SparkDownloadEBILinks - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=2000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath} - --workingPath${workingPath} - --masteryarn - - - - - - - \ No newline at end of file + + + yarn-cluster + cluster + Incremental Download EBI Links + eu.dnetlib.dhp.sx.graph.ebi.SparkDownloadEBILinks + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=2000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --workingPath${workingPath} + --masteryarn + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala index b6058f71b..87279eb21 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala @@ -51,12 +51,7 @@ class BioScholixTest extends AbstractVocabularyTest{ } - @Test - def testDownloadEBIUpdate() = { - val data = SparkDownloadEBILinks.requestBaseLineUpdatePage() - println(data) - } - + @Test def testEBIData() = { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result new file mode 100644 index 000000000..98a0841c4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result @@ -0,0 +1,1433 @@ +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0001.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0002.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0003.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0004.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0005.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0006.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0007.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0008.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0009.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0010.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0011.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0012.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0013.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0014.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0015.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0016.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0017.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0018.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0019.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0020.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0021.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0022.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0023.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0024.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0025.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0026.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0027.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0028.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0029.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0030.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0031.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0032.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0033.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0034.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0035.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0036.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0037.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0038.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0039.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0040.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0041.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0042.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0043.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0044.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0045.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0046.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0047.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0048.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0049.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0050.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0051.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0052.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0053.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0054.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0055.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0056.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0057.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0058.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0059.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0060.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0061.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0062.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0063.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0064.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0065.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0066.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0067.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0068.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0069.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0070.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0071.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0072.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0073.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0074.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0075.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0076.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0077.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0078.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0079.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0080.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0081.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0082.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0083.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0084.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0085.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0086.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0087.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0088.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0089.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0090.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0091.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0092.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0093.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0094.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0095.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0096.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0097.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0098.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0099.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0100.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0101.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0102.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0103.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0104.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0105.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0106.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0107.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0108.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0109.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0110.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0111.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0112.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0113.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0114.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0115.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0116.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0117.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0118.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0119.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0120.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0121.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0122.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0123.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0124.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0125.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0126.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0127.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0128.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0129.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0130.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0131.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0132.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0133.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0134.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0135.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0136.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0137.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0138.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0139.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0140.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0141.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0142.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0143.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0144.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0145.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0146.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0147.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0148.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0149.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0150.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0151.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0152.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0153.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0154.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0155.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0156.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0157.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0158.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0159.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0160.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0161.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0162.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0163.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0164.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0165.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0166.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0167.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0168.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0169.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0170.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0171.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0172.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0173.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0174.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0175.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0176.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0177.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0178.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0179.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0180.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0181.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0182.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0183.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0184.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0185.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0186.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0187.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0188.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0189.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0190.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0191.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0192.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0193.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0194.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0195.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0196.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0197.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0198.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0199.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0200.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0201.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0202.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0203.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0204.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0205.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0206.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0207.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0208.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0209.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0210.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0211.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0212.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0213.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0214.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0215.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0216.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0217.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0218.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0219.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0220.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0221.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0222.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0223.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0224.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0225.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0226.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0227.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0228.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0229.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0230.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0231.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0232.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0233.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0234.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0235.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0236.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0237.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0238.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0239.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0240.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0241.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0242.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0243.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0244.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0245.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0246.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0247.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0248.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0249.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0250.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0251.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0252.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0253.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0254.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0255.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0256.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0257.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0258.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0259.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0260.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0261.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0262.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0263.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0264.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0265.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0266.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0267.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0268.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0269.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0270.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0271.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0272.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0273.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0274.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0275.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0276.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0277.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0278.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0279.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0280.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0281.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0282.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0283.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0284.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0285.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0286.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0287.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0288.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0289.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0290.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0291.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0292.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0293.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0294.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0295.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0296.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0297.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0298.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0299.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0300.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0301.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0302.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0303.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0304.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0305.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0306.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0307.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0308.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0309.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0310.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0311.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0312.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0313.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0314.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0315.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0316.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0317.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0318.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0319.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0320.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0321.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0322.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0323.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0324.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0325.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0326.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0327.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0328.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0329.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0330.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0331.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0332.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0333.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0334.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0335.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0336.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0337.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0338.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0339.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0340.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0341.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0342.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0343.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0344.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0345.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0346.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0347.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0348.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0349.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0350.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0351.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0352.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0353.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0354.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0355.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0356.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0357.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0358.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0359.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0360.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0361.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0362.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0363.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0364.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0365.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0366.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0367.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0368.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0369.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0370.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0371.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0372.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0373.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0374.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0375.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0376.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0377.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0378.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0379.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0380.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0381.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0382.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0383.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0384.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0385.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0386.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0387.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0388.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0389.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0390.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0391.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0392.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0393.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0394.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0395.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0396.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0397.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0398.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0399.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0400.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0401.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0402.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0403.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0404.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0405.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0406.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0407.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0408.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0409.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0410.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0411.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0412.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0413.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0414.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0415.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0416.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0417.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0418.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0419.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0420.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0421.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0422.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0423.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0424.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0425.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0426.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0427.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0428.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0429.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0430.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0431.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0432.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0433.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0434.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0435.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0436.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0437.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0438.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0439.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0440.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0441.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0442.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0443.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0444.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0445.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0446.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0447.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0448.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0449.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0450.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0451.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0452.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0453.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0454.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0455.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0456.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0457.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0458.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0459.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0460.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0461.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0462.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0463.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0464.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0465.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0466.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0467.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0468.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0469.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0470.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0471.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0472.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0473.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0474.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0475.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0476.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0477.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0478.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0479.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0480.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0481.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0482.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0483.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0484.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0485.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0486.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0487.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0488.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0489.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0490.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0491.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0492.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0493.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0494.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0495.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0496.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0497.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0498.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0499.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0500.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0501.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0502.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0503.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0504.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0505.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0506.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0507.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0508.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0509.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0510.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0511.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0512.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0513.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0514.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0515.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0516.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0517.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0518.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0519.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0520.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0521.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0522.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0523.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0524.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0525.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0526.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0527.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0528.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0529.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0530.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0531.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0532.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0533.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0534.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0535.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0536.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0537.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0538.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0539.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0540.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0541.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0542.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0543.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0544.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0545.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0546.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0547.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0548.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0549.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0550.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0551.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0552.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0553.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0554.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0555.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0556.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0557.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0558.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0559.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0560.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0561.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0562.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0563.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0564.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0565.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0566.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0567.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0568.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0569.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0570.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0571.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0572.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0573.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0574.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0575.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0576.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0577.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0578.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0579.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0580.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0581.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0582.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0583.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0584.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0585.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0586.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0587.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0588.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0589.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0590.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0591.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0592.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0593.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0594.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0595.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0596.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0597.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0598.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0599.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0600.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0601.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0602.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0603.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0604.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0605.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0606.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0607.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0608.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0609.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0610.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0611.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0612.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0613.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0614.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0615.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0616.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0617.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0618.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0619.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0620.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0621.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0622.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0623.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0624.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0625.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0626.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0627.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0628.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0629.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0630.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0631.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0632.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0633.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0634.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0635.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0636.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0637.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0638.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0639.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0640.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0641.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0642.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0643.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0644.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0645.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0646.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0647.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0648.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0649.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0650.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0651.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0652.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0653.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0654.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0655.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0656.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0657.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0658.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0659.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0660.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0661.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0662.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0663.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0664.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0665.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0666.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0667.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0668.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0669.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0670.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0671.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0672.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0673.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0674.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0675.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0676.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0677.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0678.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0679.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0680.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0681.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0682.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0683.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0684.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0685.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0686.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0687.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0688.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0689.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0690.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0691.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0692.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0693.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0694.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0695.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0696.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0697.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0698.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0699.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0700.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0701.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0702.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0703.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0704.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0705.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0706.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0707.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0708.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0709.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0710.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0711.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0712.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0713.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0714.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0715.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0716.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0717.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0718.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0719.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0720.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0721.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0722.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0723.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0724.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0725.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0726.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0727.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0728.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0729.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0730.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0731.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0732.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0733.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0734.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0735.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0736.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0737.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0738.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0739.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0740.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0741.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0742.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0743.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0744.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0745.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0746.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0747.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0748.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0749.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0750.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0751.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0752.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0753.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0754.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0755.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0756.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0757.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0758.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0759.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0760.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0761.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0762.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0763.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0764.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0765.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0766.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0767.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0768.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0769.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0770.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0771.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0772.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0773.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0774.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0775.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0776.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0777.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0778.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0779.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0780.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0781.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0782.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0783.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0784.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0785.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0786.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0787.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0788.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0789.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0790.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0791.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0792.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0793.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0794.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0795.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0796.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0797.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0798.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0799.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0800.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0801.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0802.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0803.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0804.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0805.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0806.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0807.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0808.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0809.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0810.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0811.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0812.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0813.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0814.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0815.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0816.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0817.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0818.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0819.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0820.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0821.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0822.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0823.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0824.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0825.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0826.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0827.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0828.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0829.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0830.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0831.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0832.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0833.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0834.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0835.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0836.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0837.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0838.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0839.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0840.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0841.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0842.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0843.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0844.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0845.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0846.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0847.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0848.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0849.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0850.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0851.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0852.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0853.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0854.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0855.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0856.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0857.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0858.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0859.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0860.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0861.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0862.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0863.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0864.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0865.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0866.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0867.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0868.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0869.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0870.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0871.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0872.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0873.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0874.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0875.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0876.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0877.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0878.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0879.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0880.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0881.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0882.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0883.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0884.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0885.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0886.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0887.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0888.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0889.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0890.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0891.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0892.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0893.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0894.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0895.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0896.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0897.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0898.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0899.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0900.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0901.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0902.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0903.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0904.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0905.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0906.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0907.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0908.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0909.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0910.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0911.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0912.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0913.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0914.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0915.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0916.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0917.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0918.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0919.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0920.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0921.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0922.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0923.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0924.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0925.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0926.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0927.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0928.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0929.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0930.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0931.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0932.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0933.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0934.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0935.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0936.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0937.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0938.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0939.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0940.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0941.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0942.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0943.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0944.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0945.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0946.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0947.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0948.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0949.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0950.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0951.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0952.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0953.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0954.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0955.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0956.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0957.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0958.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0959.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0960.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0961.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0962.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0963.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0964.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0965.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0966.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0967.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0968.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0969.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0970.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0971.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0972.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0973.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0974.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0975.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0976.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0977.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0978.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0979.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0980.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0981.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0982.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0983.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0984.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0985.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0986.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0987.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0988.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0989.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0990.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0991.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0992.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0993.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0994.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0995.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0996.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0997.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0998.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0999.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1000.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1001.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1002.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1003.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1004.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1005.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1006.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1007.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1008.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1009.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1010.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1011.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1012.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1013.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1014.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1015.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1016.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1017.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1018.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1019.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1020.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1021.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1022.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1023.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1024.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1025.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1026.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1027.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1028.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1029.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1030.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1031.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1032.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1033.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1034.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1035.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1036.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1037.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1038.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1039.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1040.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1041.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1042.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1043.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1044.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1045.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1046.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1047.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1048.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1049.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1050.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1051.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1052.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1053.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1054.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1055.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1056.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1057.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1058.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1059.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1060.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1061.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1062.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1063.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1064.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1065.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1066.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1067.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1068.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1069.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1070.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1071.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1072.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1073.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1074.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1075.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1076.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1077.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1078.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1079.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1080.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1081.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1082.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1083.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1084.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1085.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1086.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1087.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1088.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1089.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1090.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1091.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1092.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1093.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1094.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1095.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1096.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1097.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1098.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1099.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1100.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1101.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1102.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1103.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1104.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1105.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1106.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1107.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1108.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1109.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1110.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1111.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1112.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1113.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1114.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1115.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1116.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1117.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1118.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1119.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1120.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1121.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1122.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1123.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1124.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1125.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1126.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1127.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1128.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1129.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1130.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1131.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1132.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1133.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1134.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1135.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1136.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1137.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1138.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1139.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1140.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1141.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1142.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1143.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1144.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1145.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1146.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1147.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1148.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1149.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1150.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1151.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1152.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1153.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1154.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1155.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1156.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1157.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1158.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1159.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1160.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1161.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1162.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1163.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1164.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1165.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1166.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1167.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1168.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1169.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1170.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1171.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1172.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1173.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1174.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1175.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1176.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1177.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1178.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1179.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1180.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1181.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1182.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1183.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1184.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1185.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1186.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1187.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1188.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1189.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1190.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1191.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1192.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1193.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1194.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1195.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1196.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1197.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1198.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1199.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1200.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1201.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1202.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1203.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1204.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1205.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1206.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1207.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1208.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1209.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1210.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1211.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1212.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1213.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1214.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1215.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1216.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1217.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1218.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1219.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1220.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1221.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1222.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1223.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1224.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1225.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1226.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1227.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1228.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1229.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1230.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1231.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1232.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1233.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1234.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1235.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1236.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1237.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1238.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1239.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1240.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1241.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1242.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1243.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1244.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1245.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1246.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1247.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1248.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1249.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1250.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1251.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1252.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1253.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1254.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1255.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1256.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1257.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1258.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1259.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1260.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1261.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1262.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1263.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1264.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1265.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1266.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1267.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1268.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1269.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1270.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1271.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1272.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1273.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1274.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1275.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1276.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1277.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1278.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1279.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1280.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1281.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1282.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1283.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1284.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1285.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1286.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1287.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1288.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1289.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1290.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1291.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1292.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1293.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1294.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1295.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1296.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1297.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1298.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1299.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1300.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1301.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1302.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1303.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1304.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1305.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1306.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1307.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1308.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1309.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1310.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1311.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1312.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1313.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1314.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1315.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1316.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1317.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1318.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1319.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1320.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1321.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1322.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1323.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1324.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1325.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1326.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1327.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1328.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1329.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1330.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1331.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1332.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1333.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1334.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1335.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1336.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1337.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1338.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1339.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1340.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1341.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1342.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1343.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1344.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1345.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1346.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1347.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1348.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1349.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1350.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1351.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1352.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1353.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1354.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1355.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1356.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1357.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1358.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1359.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1360.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1361.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1362.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1363.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1364.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1365.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1366.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1367.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1368.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1369.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1370.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1371.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1372.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1373.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1374.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1375.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1376.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1377.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1378.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1379.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1380.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1381.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1382.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1383.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1384.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1385.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1386.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1387.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1388.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1389.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1390.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1391.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1392.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1393.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1394.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1395.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1396.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1397.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1398.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1399.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1400.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1401.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1402.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1403.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1404.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1405.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1406.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1407.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1408.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1409.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1410.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1411.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1412.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1413.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1414.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1415.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1416.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1417.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1418.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1419.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1420.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1421.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1422.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1423.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1424.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1425.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1426.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1427.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1428.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1429.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1430.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1431.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1432.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1433.xml.gz \ No newline at end of file From 9646b9fd98b2e575dbe2bf258ccf708ad4c4be45 Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 7 Oct 2021 11:29:11 +0200 Subject: [PATCH 262/287] implementation of the http call for the update of openorgs suggestions --- .../dhp/oa/dedup/UpdateOpenorgsJob.java | 115 ++++++++++++++++++ .../oa/dedup/openorgs/oozie_app/workflow.xml | 20 +-- .../dedup/updateOpenorgsJob_parameters.json | 14 +++ 3 files changed, 141 insertions(+), 8 deletions(-) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java new file mode 100644 index 000000000..f23c54e2e --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java @@ -0,0 +1,115 @@ +package eu.dnetlib.dhp.oa.dedup; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import org.apache.commons.io.IOUtils; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.TimeUnit; + +public class UpdateOpenorgsJob { + + private static final Logger log = LoggerFactory.getLogger(UpdateOpenorgsJob.class); + + public static void main(String[] args) throws Exception { + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils.toString(SparkCreateSimRels.class.getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json"))); + parser.parseArgument(args); + + final String apiUrl = parser.get("apiUrl"); + final int delay = Integer.parseInt(parser.get("delay")); + + log.info("apiUrl: '{}'", apiUrl); + log.info("delay: '{}'", delay); + + APIResponse res = httpCall(apiUrl); + while(res!=null && res.getStatus().equals(ImportStatus.RUNNING)){ + TimeUnit.MINUTES.sleep(delay); + res = httpCall(apiUrl + "/status"); + } + + if (res==null) { + log.error("Openorgs Update FAILED: No response"); + throw new RuntimeException("Openorgs Update FAILED: No response"); + } + + if (res.getStatus()==null || !res.getStatus().equals(ImportStatus.SUCCESS)) { + log.error("Openorgs Update FAILED: '{}' - '{}'", res.getStatus(), res.getMessage()); + throw new RuntimeException(res.getMessage()); + } + + } + + private static APIResponse httpCall(final String url) throws Exception { + final HttpGet req = new HttpGet(url); + + try (final CloseableHttpClient client = HttpClients.createDefault()) { + try (final CloseableHttpResponse response = client.execute(req)) { + final String s = IOUtils.toString(response.getEntity().getContent()); + return (new ObjectMapper()).readValue(s, APIResponse.class); + } + } + } + +} + +class APIResponse { + private String id; + private Long dateStart; + private Long dateEnd; + private ImportStatus status; + private String message; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public Long getDateStart() { + return dateStart; + } + + public void setDateStart(Long dateStart) { + this.dateStart = dateStart; + } + + public Long getDateEnd() { + return dateEnd; + } + + public void setDateEnd(Long dateEnd) { + this.dateEnd = dateEnd; + } + + public ImportStatus getStatus() { + return status; + } + + public void setStatus(ImportStatus status) { + this.status = status; + } + + public String getMessage() { + return message; + } + + public void setMessage(String message) { + this.message = message; + } +} + +enum ImportStatus { + SUCCESS, + FAILED, + RUNNING, + NOT_LAUNCHED, + NOT_YET_STARTED +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml index 30442406c..c7c6c9257 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml @@ -28,6 +28,11 @@ dbPwd password to access the OpenOrgs database + + dbConnections + 10 + number of connections to the postgres db + workingPath path for the working directory @@ -223,7 +228,7 @@ --dbTable${dbTable} --dbUser${dbUser} --dbPwd${dbPwd} - --numConnections20 + --numConnections${dbConnections} @@ -254,19 +259,18 @@ --dbTable${dbTable} --dbUser${dbUser} --dbPwd${dbPwd} - --numConnections20 + --numConnections${dbConnections} - - ${jobTracker} - ${nameNode} - /usr/bin/curl - ${apiUrl} - + + eu.dnetlib.dhp.oa.dedup.UpdateOpenorgsJob + --apiUrl${apiUrl} + --delay5 + diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json new file mode 100644 index 000000000..5ca4a3dba --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json @@ -0,0 +1,14 @@ +[ + { + "paramName": "api", + "paramLongName": "apiUrl", + "paramDescription": "the url of the API", + "paramRequired": true + }, + { + "paramName": "d", + "paramLongName": "delay", + "paramDescription": "delay for the HTTP call in minutes", + "paramRequired": true + } +] \ No newline at end of file From 611ca511db42d8f07e3c21e7526a19d0755a57b3 Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 7 Oct 2021 15:39:55 +0200 Subject: [PATCH 263/287] set configuration property in openorgs duplicates wf --- .../dhp/oa/dedup/SparkWhitelistSimRels.java | 201 +-- .../dhp/oa/dedup/UpdateOpenorgsJob.java | 32 +- .../oa/dedup/openorgs/oozie_app/workflow.xml | 6 + .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 1266 +++++++++-------- 4 files changed, 761 insertions(+), 744 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java index fa7d33570..7d91e47cc 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; @@ -5,10 +6,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; @@ -21,7 +19,6 @@ import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.dedup.model.Block; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; @@ -31,121 +28,127 @@ import eu.dnetlib.pace.config.DedupConfig; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; -import scala.Tuple3; public class SparkWhitelistSimRels extends AbstractSparkAction { - private static final Logger log = LoggerFactory.getLogger(SparkCreateSimRels.class); + private static final Logger log = LoggerFactory.getLogger(SparkCreateSimRels.class); - private static final String WHITELIST_SEPARATOR = "####"; + private static final String WHITELIST_SEPARATOR = "####"; - public SparkWhitelistSimRels(ArgumentApplicationParser parser, SparkSession spark) { - super(parser, spark); - } + public SparkWhitelistSimRels(ArgumentApplicationParser parser, SparkSession spark) { + super(parser, spark); + } - public static void main(String[] args) throws Exception { - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); - parser.parseArgument(args); + public static void main(String[] args) throws Exception { + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + parser.parseArgument(args); - SparkConf conf = new SparkConf(); - new SparkWhitelistSimRels(parser, getSparkSession(conf)) - .run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl"))); - } + SparkConf conf = new SparkConf(); + new SparkWhitelistSimRels(parser, getSparkSession(conf)) + .run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl"))); + } - @Override - public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException, SAXException { + @Override + public void run(ISLookUpService isLookUpService) + throws DocumentException, IOException, ISLookUpException, SAXException { - // read oozie parameters - final String graphBasePath = parser.get("graphBasePath"); - final String isLookUpUrl = parser.get("isLookUpUrl"); - final String actionSetId = parser.get("actionSetId"); - final String workingPath = parser.get("workingPath"); - final int numPartitions = Optional - .ofNullable(parser.get("numPartitions")) - .map(Integer::valueOf) - .orElse(NUM_PARTITIONS); - final String whiteListPath = parser.get("whiteListPath"); + // read oozie parameters + final String graphBasePath = parser.get("graphBasePath"); + final String isLookUpUrl = parser.get("isLookUpUrl"); + final String actionSetId = parser.get("actionSetId"); + final String workingPath = parser.get("workingPath"); + final int numPartitions = Optional + .ofNullable(parser.get("numPartitions")) + .map(Integer::valueOf) + .orElse(NUM_PARTITIONS); + final String whiteListPath = parser.get("whiteListPath"); - log.info("numPartitions: '{}'", numPartitions); - log.info("graphBasePath: '{}'", graphBasePath); - log.info("isLookUpUrl: '{}'", isLookUpUrl); - log.info("actionSetId: '{}'", actionSetId); - log.info("workingPath: '{}'", workingPath); - log.info("whiteListPath: '{}'", whiteListPath); + log.info("numPartitions: '{}'", numPartitions); + log.info("graphBasePath: '{}'", graphBasePath); + log.info("isLookUpUrl: '{}'", isLookUpUrl); + log.info("actionSetId: '{}'", actionSetId); + log.info("workingPath: '{}'", workingPath); + log.info("whiteListPath: '{}'", whiteListPath); - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - //file format: source####target - Dataset> whiteListRels = spark.createDataset(sc - .textFile(whiteListPath) - //check if the line is in the correct format: id1####id2 - .filter(s -> s.contains(WHITELIST_SEPARATOR) && s.split(WHITELIST_SEPARATOR).length == 2) - .map(s -> new Tuple2<>(s.split(WHITELIST_SEPARATOR)[0], s.split(WHITELIST_SEPARATOR)[1])) - .rdd(), - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + // file format: source####target + Dataset> whiteListRels = spark + .createDataset( + sc + .textFile(whiteListPath) + // check if the line is in the correct format: id1####id2 + .filter(s -> s.contains(WHITELIST_SEPARATOR) && s.split(WHITELIST_SEPARATOR).length == 2) + .map(s -> new Tuple2<>(s.split(WHITELIST_SEPARATOR)[0], s.split(WHITELIST_SEPARATOR)[1])) + .rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - // for each dedup configuration - for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { + // for each dedup configuration + for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { - final String entity = dedupConf.getWf().getEntityType(); - final String subEntity = dedupConf.getWf().getSubEntityValue(); - log.info("Adding whitelist simrels for: '{}'", subEntity); + final String entity = dedupConf.getWf().getEntityType(); + final String subEntity = dedupConf.getWf().getSubEntityValue(); + log.info("Adding whitelist simrels for: '{}'", subEntity); - final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity); + final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity); - Dataset> entities = spark.createDataset(sc - .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) - .repartition(numPartitions) - .mapToPair( - (PairFunction) s -> { - MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s); - return new Tuple2<>(d.getIdentifier(), "present"); - }) - .rdd(), - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + Dataset> entities = spark + .createDataset( + sc + .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) + .repartition(numPartitions) + .mapToPair( + (PairFunction) s -> { + MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s); + return new Tuple2<>(d.getIdentifier(), "present"); + }) + .rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - Dataset> whiteListRels1 = whiteListRels - .joinWith(entities, whiteListRels.col("_1").equalTo(entities.col("_1")), "inner") - .map((MapFunction, Tuple2>, Tuple2>) Tuple2::_1, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + Dataset> whiteListRels1 = whiteListRels + .joinWith(entities, whiteListRels.col("_1").equalTo(entities.col("_1")), "inner") + .map( + (MapFunction, Tuple2>, Tuple2>) Tuple2::_1, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - Dataset> whiteListRels2 = whiteListRels1 - .joinWith(entities, whiteListRels1.col("_2").equalTo(entities.col("_1")), "inner") - .map((MapFunction, Tuple2>, Tuple2>) Tuple2::_1, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + Dataset> whiteListRels2 = whiteListRels1 + .joinWith(entities, whiteListRels1.col("_2").equalTo(entities.col("_1")), "inner") + .map( + (MapFunction, Tuple2>, Tuple2>) Tuple2::_1, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - Dataset whiteListSimRels = whiteListRels2 - .map((MapFunction, Relation>) - r -> createSimRel(r._1(), r._2(), entity), - Encoders.bean(Relation.class) - ); + Dataset whiteListSimRels = whiteListRels2 + .map( + (MapFunction, Relation>) r -> createSimRel(r._1(), r._2(), entity), + Encoders.bean(Relation.class)); - saveParquet(whiteListSimRels, outputPath, SaveMode.Append); - } - } + saveParquet(whiteListSimRels, outputPath, SaveMode.Append); + } + } - private Relation createSimRel(String source, String target, String entity) { - final Relation r = new Relation(); - r.setSource(source); - r.setTarget(target); - r.setSubRelType("dedupSimilarity"); - r.setRelClass("isSimilarTo"); - r.setDataInfo(new DataInfo()); + private Relation createSimRel(String source, String target, String entity) { + final Relation r = new Relation(); + r.setSource(source); + r.setTarget(target); + r.setSubRelType("dedupSimilarity"); + r.setRelClass("isSimilarTo"); + r.setDataInfo(new DataInfo()); - switch (entity) { - case "result": - r.setRelType("resultResult"); - break; - case "organization": - r.setRelType("organizationOrganization"); - break; - default: - throw new IllegalArgumentException("unmanaged entity type: " + entity); - } - return r; - } + switch (entity) { + case "result": + r.setRelType("resultResult"); + break; + case "organization": + r.setRelType("organizationOrganization"); + break; + default: + throw new IllegalArgumentException("unmanaged entity type: " + entity); + } + return r; + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java index f23c54e2e..d094fb72b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java @@ -1,7 +1,8 @@ + package eu.dnetlib.dhp.oa.dedup; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import java.util.concurrent.TimeUnit; + import org.apache.commons.io.IOUtils; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -10,16 +11,21 @@ import org.apache.http.impl.client.HttpClients; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.concurrent.TimeUnit; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; public class UpdateOpenorgsJob { - private static final Logger log = LoggerFactory.getLogger(UpdateOpenorgsJob.class); + private static final Logger log = LoggerFactory.getLogger(UpdateOpenorgsJob.class); public static void main(String[] args) throws Exception { - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils.toString(SparkCreateSimRels.class.getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json"))); - parser.parseArgument(args); + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json"))); + parser.parseArgument(args); final String apiUrl = parser.get("apiUrl"); final int delay = Integer.parseInt(parser.get("delay")); @@ -28,17 +34,17 @@ public class UpdateOpenorgsJob { log.info("delay: '{}'", delay); APIResponse res = httpCall(apiUrl); - while(res!=null && res.getStatus().equals(ImportStatus.RUNNING)){ + while (res != null && res.getStatus().equals(ImportStatus.RUNNING)) { TimeUnit.MINUTES.sleep(delay); res = httpCall(apiUrl + "/status"); } - if (res==null) { + if (res == null) { log.error("Openorgs Update FAILED: No response"); throw new RuntimeException("Openorgs Update FAILED: No response"); } - if (res.getStatus()==null || !res.getStatus().equals(ImportStatus.SUCCESS)) { + if (res.getStatus() == null || !res.getStatus().equals(ImportStatus.SUCCESS)) { log.error("Openorgs Update FAILED: '{}' - '{}'", res.getStatus(), res.getMessage()); throw new RuntimeException(res.getMessage()); } @@ -107,9 +113,5 @@ class APIResponse { } enum ImportStatus { - SUCCESS, - FAILED, - RUNNING, - NOT_LAUNCHED, - NOT_YET_STARTED + SUCCESS, FAILED, RUNNING, NOT_LAUNCHED, NOT_YET_STARTED } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml index c7c6c9257..6947019e8 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml @@ -267,6 +267,12 @@ + + + oozie.launcher.mapreduce.user.classpath.first + true + + eu.dnetlib.dhp.oa.dedup.UpdateOpenorgsJob --apiUrl${apiUrl} --delay5 diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index fa03f93a6..549988767 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -48,634 +48,640 @@ import scala.Tuple2; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class SparkDedupTest implements Serializable { - @Mock(serializable = true) - ISLookUpService isLookUpService; - - private static SparkSession spark; - private static JavaSparkContext jsc; - - private static String testGraphBasePath; - private static String testOutputBasePath; - private static String testDedupGraphBasePath; - private static final String testActionSetId = "test-orchestrator"; - private static String whitelistPath; - private static List whiteList; - - private static String WHITELIST_SEPARATOR = "####"; - - @BeforeAll - public static void cleanUp() throws IOException, URISyntaxException { - - testGraphBasePath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) - .toFile() - .getAbsolutePath(); - testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); - - testDedupGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); - - whitelistPath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/whitelist.simrels.txt").toURI()) - .toFile() - .getAbsolutePath(); - whiteList = IOUtils.readLines(new FileReader(whitelistPath)); - - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); - - final SparkConf conf = new SparkConf(); - conf.set("spark.sql.shuffle.partitions", "200"); - spark = SparkSession - .builder() - .appName(SparkDedupTest.class.getSimpleName()) - .master("local[*]") - .config(conf) - .getOrCreate(); - - jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - } - - @BeforeEach - public void setUp() throws IOException, ISLookUpException { - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("organization"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("software"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("dataset"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("otherresearchproduct"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); - } - - @Test - @Order(1) - void createSimRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json"))); - - parser - .parseArgument( - new String[]{ - "-i", testGraphBasePath, - "-asi", testActionSetId, - "-la", "lookupurl", - "-w", testOutputBasePath, - "-np", "50" - }); - - new SparkCreateSimRels(parser, spark).run(isLookUpService); - - long orgs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) - .count(); - - long pubs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) - .count(); - - long sw_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")) - .count(); - - long ds_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) - .count(); - - long orp_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) - .count(); - - assertEquals(3082, orgs_simrel); - assertEquals(7036, pubs_simrel); - assertEquals(336, sw_simrel); - assertEquals(442, ds_simrel); - assertEquals(6750, orp_simrel); - } - - @Test - @Order(2) - void whitelistSimRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkWhitelistSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); - - parser - .parseArgument( - new String[]{ - "-i", testGraphBasePath, - "-asi", testActionSetId, - "-la", "lookupurl", - "-w", testOutputBasePath, - "-np", "50", - "-wl", whitelistPath - }); - - new SparkWhitelistSimRels(parser, spark).run(isLookUpService); - - long orgs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) - .count(); - - long pubs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) - .count(); - - long ds_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) - .count(); - - long orp_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) - .count(); - - //entities simrels supposed to be equal to the number of previous step (no rels in whitelist) - assertEquals(3082, orgs_simrel); - assertEquals(7036, pubs_simrel); - assertEquals(442, ds_simrel); - assertEquals(6750, orp_simrel); - - //entities simrels to be different from the number of previous step (new simrels in the whitelist) - Dataset sw_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")); - - //check if the first relation in the whitelist exists - assertTrue(sw_simrel - .as(Encoders.bean(Relation.class)) - .toJavaRDD() - .filter(rel -> - rel.getSource().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[0]) && rel.getTarget().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[1])).count() > 0); - //check if the second relation in the whitelist exists - assertTrue(sw_simrel - .as(Encoders.bean(Relation.class)) - .toJavaRDD() - .filter(rel -> - rel.getSource().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[0]) && rel.getTarget().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[1])).count() > 0); - - assertEquals(338, sw_simrel.count()); - - } - - @Test - @Order(3) - void cutMergeRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateMergeRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); - - parser - .parseArgument( - new String[]{ - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath, - "-cc", - "3" - }); - - new SparkCreateMergeRels(parser, spark).run(isLookUpService); - - long orgs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long pubs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - long sw_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long ds_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long orp_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - assertEquals(0, orgs_mergerel); - assertEquals(0, pubs_mergerel); - assertEquals(0, sw_mergerel); - assertEquals(0, ds_mergerel); - assertEquals(0, orp_mergerel); - - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel")); - FileUtils - .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); - } - - @Test - @Order(4) - void createMergeRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateMergeRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); - - parser - .parseArgument( - new String[]{ - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath - }); - - new SparkCreateMergeRels(parser, spark).run(isLookUpService); - - long orgs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .count(); - long pubs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .count(); - long sw_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .count(); - long ds_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .count(); - - long orp_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .count(); - - assertEquals(1272, orgs_mergerel); - assertEquals(1438, pubs_mergerel); - assertEquals(286, sw_mergerel); - assertEquals(472, ds_mergerel); - assertEquals(718, orp_mergerel); - - } - - @Test - @Order(5) - void createDedupRecordTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateDedupRecord.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json"))); - parser - .parseArgument( - new String[]{ - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath - }); - - new SparkCreateDedupRecord(parser, spark).run(isLookUpService); - - long orgs_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_deduprecord") - .count(); - long pubs_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") - .count(); - long sw_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord") - .count(); - long ds_deduprecord = jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_deduprecord").count(); - long orp_deduprecord = jsc - .textFile( - testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord") - .count(); - - assertEquals(85, orgs_deduprecord); - assertEquals(65, pubs_deduprecord); - assertEquals(49, sw_deduprecord); - assertEquals(97, ds_deduprecord); - assertEquals(89, orp_deduprecord); - } - - @Test - @Order(6) - void updateEntityTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkUpdateEntity.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json"))); - parser - .parseArgument( - new String[]{ - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath - }); - - new SparkUpdateEntity(parser, spark).run(isLookUpService); - - long organizations = jsc.textFile(testDedupGraphBasePath + "/organization").count(); - long publications = jsc.textFile(testDedupGraphBasePath + "/publication").count(); - long projects = jsc.textFile(testDedupGraphBasePath + "/project").count(); - long datasource = jsc.textFile(testDedupGraphBasePath + "/datasource").count(); - long softwares = jsc.textFile(testDedupGraphBasePath + "/software").count(); - long dataset = jsc.textFile(testDedupGraphBasePath + "/dataset").count(); - long otherresearchproduct = jsc.textFile(testDedupGraphBasePath + "/otherresearchproduct").count(); - - long mergedOrgs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedPubs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedSw = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedDs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedOrp = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - assertEquals(896, publications); - assertEquals(838, organizations); - assertEquals(100, projects); - assertEquals(100, datasource); - assertEquals(198, softwares); - assertEquals(389, dataset); - assertEquals(517, otherresearchproduct); - - long deletedOrgs = jsc - .textFile(testDedupGraphBasePath + "/organization") - .filter(this::isDeletedByInference) - .count(); - - long deletedPubs = jsc - .textFile(testDedupGraphBasePath + "/publication") - .filter(this::isDeletedByInference) - .count(); - - long deletedSw = jsc - .textFile(testDedupGraphBasePath + "/software") - .filter(this::isDeletedByInference) - .count(); - - long deletedDs = jsc - .textFile(testDedupGraphBasePath + "/dataset") - .filter(this::isDeletedByInference) - .count(); - - long deletedOrp = jsc - .textFile(testDedupGraphBasePath + "/otherresearchproduct") - .filter(this::isDeletedByInference) - .count(); - - assertEquals(mergedOrgs, deletedOrgs); - assertEquals(mergedPubs, deletedPubs); - assertEquals(mergedSw, deletedSw); - assertEquals(mergedDs, deletedDs); - assertEquals(mergedOrp, deletedOrp); - } - - @Test - @Order(7) - void propagateRelationTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkPropagateRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); - parser - .parseArgument( - new String[]{ - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath - }); - - new SparkPropagateRelation(parser, spark).run(isLookUpService); - - long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - - assertEquals(4860, relations); - - // check deletedbyinference - final Dataset mergeRels = spark - .read() - .load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*")) - .as(Encoders.bean(Relation.class)); - final JavaPairRDD mergedIds = mergeRels - .where("relClass == 'merges'") - .select(mergeRels.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2(r.getString(0), "d")); - - JavaRDD toCheck = jsc - .textFile(testDedupGraphBasePath + "/relation") - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()) - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()); - - long deletedbyinference = toCheck.filter(this::isDeletedByInference).count(); - long updated = toCheck.count(); - - assertEquals(updated, deletedbyinference); - } - - @Test - @Order(8) - void testRelations() throws Exception { - testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); - testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); - } - - private void testUniqueness(String path, int expected_total, int expected_unique) { - Dataset rel = spark - .read() - .textFile(getClass().getResource(path).getPath()) - .map( - (MapFunction) s -> new ObjectMapper().readValue(s, Relation.class), - Encoders.bean(Relation.class)); - - assertEquals(expected_total, rel.count()); - assertEquals(expected_unique, rel.distinct().count()); - } - - @AfterAll - public static void finalCleanUp() throws IOException { - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); - } - - public boolean isDeletedByInference(String s) { - return s.contains("\"deletedbyinference\":true"); - } + @Mock(serializable = true) + ISLookUpService isLookUpService; + + private static SparkSession spark; + private static JavaSparkContext jsc; + + private static String testGraphBasePath; + private static String testOutputBasePath; + private static String testDedupGraphBasePath; + private static final String testActionSetId = "test-orchestrator"; + private static String whitelistPath; + private static List whiteList; + + private static String WHITELIST_SEPARATOR = "####"; + + @BeforeAll + public static void cleanUp() throws IOException, URISyntaxException { + + testGraphBasePath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) + .toFile() + .getAbsolutePath(); + testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + testDedupGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + whitelistPath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/whitelist.simrels.txt").toURI()) + .toFile() + .getAbsolutePath(); + whiteList = IOUtils.readLines(new FileReader(whitelistPath)); + + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + + final SparkConf conf = new SparkConf(); + conf.set("spark.sql.shuffle.partitions", "200"); + spark = SparkSession + .builder() + .appName(SparkDedupTest.class.getSimpleName()) + .master("local[*]") + .config(conf) + .getOrCreate(); + + jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + } + + @BeforeEach + public void setUp() throws IOException, ISLookUpException { + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("organization"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("software"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("dataset"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("otherresearchproduct"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); + } + + @Test + @Order(1) + void createSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-la", "lookupurl", + "-w", testOutputBasePath, + "-np", "50" + }); + + new SparkCreateSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + long pubs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .count(); + + long sw_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")) + .count(); + + long ds_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) + .count(); + + long orp_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) + .count(); + + assertEquals(3082, orgs_simrel); + assertEquals(7036, pubs_simrel); + assertEquals(336, sw_simrel); + assertEquals(442, ds_simrel); + assertEquals(6750, orp_simrel); + } + + @Test + @Order(2) + void whitelistSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkWhitelistSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-la", "lookupurl", + "-w", testOutputBasePath, + "-np", "50", + "-wl", whitelistPath + }); + + new SparkWhitelistSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + long pubs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .count(); + + long ds_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) + .count(); + + long orp_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) + .count(); + + // entities simrels supposed to be equal to the number of previous step (no rels in whitelist) + assertEquals(3082, orgs_simrel); + assertEquals(7036, pubs_simrel); + assertEquals(442, ds_simrel); + assertEquals(6750, orp_simrel); + + // entities simrels to be different from the number of previous step (new simrels in the whitelist) + Dataset sw_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")); + + // check if the first relation in the whitelist exists + assertTrue( + sw_simrel + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .filter( + rel -> rel.getSource().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[0]) + && rel.getTarget().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[1])) + .count() > 0); + // check if the second relation in the whitelist exists + assertTrue( + sw_simrel + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .filter( + rel -> rel.getSource().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[0]) + && rel.getTarget().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[1])) + .count() > 0); + + assertEquals(338, sw_simrel.count()); + + } + + @Test + @Order(3) + void cutMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateMergeRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath, + "-cc", + "3" + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long pubs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + assertEquals(0, orgs_mergerel); + assertEquals(0, pubs_mergerel); + assertEquals(0, sw_mergerel); + assertEquals(0, ds_mergerel); + assertEquals(0, orp_mergerel); + + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel")); + FileUtils + .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); + } + + @Test + @Order(4) + void createMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateMergeRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .count(); + long pubs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .count(); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .count(); + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .count(); + + assertEquals(1272, orgs_mergerel); + assertEquals(1438, pubs_mergerel); + assertEquals(286, sw_mergerel); + assertEquals(472, ds_mergerel); + assertEquals(718, orp_mergerel); + + } + + @Test + @Order(5) + void createDedupRecordTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateDedupRecord.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath + }); + + new SparkCreateDedupRecord(parser, spark).run(isLookUpService); + + long orgs_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_deduprecord") + .count(); + long pubs_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") + .count(); + long sw_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord") + .count(); + long ds_deduprecord = jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_deduprecord").count(); + long orp_deduprecord = jsc + .textFile( + testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord") + .count(); + + assertEquals(85, orgs_deduprecord); + assertEquals(65, pubs_deduprecord); + assertEquals(49, sw_deduprecord); + assertEquals(97, ds_deduprecord); + assertEquals(89, orp_deduprecord); + } + + @Test + @Order(6) + void updateEntityTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkUpdateEntity.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + }); + + new SparkUpdateEntity(parser, spark).run(isLookUpService); + + long organizations = jsc.textFile(testDedupGraphBasePath + "/organization").count(); + long publications = jsc.textFile(testDedupGraphBasePath + "/publication").count(); + long projects = jsc.textFile(testDedupGraphBasePath + "/project").count(); + long datasource = jsc.textFile(testDedupGraphBasePath + "/datasource").count(); + long softwares = jsc.textFile(testDedupGraphBasePath + "/software").count(); + long dataset = jsc.textFile(testDedupGraphBasePath + "/dataset").count(); + long otherresearchproduct = jsc.textFile(testDedupGraphBasePath + "/otherresearchproduct").count(); + + long mergedOrgs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedPubs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedSw = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedDs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedOrp = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + assertEquals(896, publications); + assertEquals(838, organizations); + assertEquals(100, projects); + assertEquals(100, datasource); + assertEquals(198, softwares); + assertEquals(389, dataset); + assertEquals(517, otherresearchproduct); + + long deletedOrgs = jsc + .textFile(testDedupGraphBasePath + "/organization") + .filter(this::isDeletedByInference) + .count(); + + long deletedPubs = jsc + .textFile(testDedupGraphBasePath + "/publication") + .filter(this::isDeletedByInference) + .count(); + + long deletedSw = jsc + .textFile(testDedupGraphBasePath + "/software") + .filter(this::isDeletedByInference) + .count(); + + long deletedDs = jsc + .textFile(testDedupGraphBasePath + "/dataset") + .filter(this::isDeletedByInference) + .count(); + + long deletedOrp = jsc + .textFile(testDedupGraphBasePath + "/otherresearchproduct") + .filter(this::isDeletedByInference) + .count(); + + assertEquals(mergedOrgs, deletedOrgs); + assertEquals(mergedPubs, deletedPubs); + assertEquals(mergedSw, deletedSw); + assertEquals(mergedDs, deletedDs); + assertEquals(mergedOrp, deletedOrp); + } + + @Test + @Order(7) + void propagateRelationTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkPropagateRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + }); + + new SparkPropagateRelation(parser, spark).run(isLookUpService); + + long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); + + assertEquals(4860, relations); + + // check deletedbyinference + final Dataset mergeRels = spark + .read() + .load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*")) + .as(Encoders.bean(Relation.class)); + final JavaPairRDD mergedIds = mergeRels + .where("relClass == 'merges'") + .select(mergeRels.col("target")) + .distinct() + .toJavaRDD() + .mapToPair( + (PairFunction) r -> new Tuple2(r.getString(0), "d")); + + JavaRDD toCheck = jsc + .textFile(testDedupGraphBasePath + "/relation") + .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json)) + .join(mergedIds) + .map(t -> t._2()._1()) + .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json)) + .join(mergedIds) + .map(t -> t._2()._1()); + + long deletedbyinference = toCheck.filter(this::isDeletedByInference).count(); + long updated = toCheck.count(); + + assertEquals(updated, deletedbyinference); + } + + @Test + @Order(8) + void testRelations() throws Exception { + testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); + testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); + } + + private void testUniqueness(String path, int expected_total, int expected_unique) { + Dataset rel = spark + .read() + .textFile(getClass().getResource(path).getPath()) + .map( + (MapFunction) s -> new ObjectMapper().readValue(s, Relation.class), + Encoders.bean(Relation.class)); + + assertEquals(expected_total, rel.count()); + assertEquals(expected_unique, rel.distinct().count()); + } + + @AfterAll + public static void finalCleanUp() throws IOException { + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + } + + public boolean isDeletedByInference(String s) { + return s.contains("\"deletedbyinference\":true"); + } } From 8d3b60f4469f553acfad14e618414c930daebe05 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 7 Oct 2021 17:30:45 +0200 Subject: [PATCH 264/287] test for patching records for EOSC Future --- .../provision/IndexRecordTransformerTest.java | 20 ++ .../eosc-future/air-quality-copernicus.xml | 114 +++++++ .../eosc-future/b2share-plot-related-orp.xml | 288 ++++++++++++++++++ .../provision/eosc-future/b2share-plot-sw.xml | 112 +++++++ .../eosc-future/data-transfer-pilot.xml | 23 +- .../training-notebooks-seadatanet.xml | 11 +- 6 files changed, 551 insertions(+), 17 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/air-quality-copernicus.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-related-orp.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-sw.xml diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index 8daf318be..1c7dce3f2 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -88,6 +88,26 @@ public class IndexRecordTransformerTest { testRecordTransformation(record); } + @Test + public void testForEOSCFutureAirQualityCopernicus() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/air-quality-copernicus.xml")); + testRecordTransformation(record); + } + + @Test + public void testForEOSCFutureB2SharePlotSw() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/b2share-plot-sw.xml")); + testRecordTransformation(record); + } + + @Test + public void testForEOSCFutureB2SharePlotRelatedORP() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/b2share-plot-related-orp.xml")); + testRecordTransformation(record); + } + + + private void testRecordTransformation(final String record) throws IOException, TransformerException { final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/air-quality-copernicus.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/air-quality-copernicus.xml new file mode 100644 index 000000000..43b256bbb --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/air-quality-copernicus.xml @@ -0,0 +1,114 @@ + + +
+ r37b0ad08687::a8df7db30ae0e4e0b875a098df7b652f + 2021-10-07T01:56:56Z + under curation + +
+ + + + + Using CAMS European air quality analysis from Copernicus + Atmosphere Monitoring with RELIANCE services + + Simone Mantovani + 2021-10-07 + + + + This notebook shows how to discover and access the Copernicus Atmosphere Monitoring products available in the RELIANCE datacube resources. + The process is structured in 6 steps, including example of data analysis and visualization with the Python libraries installed in the Jupyter environment + + + EOSC Jupyter Notebook + + RELIANCE + + Copernicus + + Air quality + + + + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:5554786 + + oai:zenodo.org:5554786 + + 10.5281/zenodo.5554786 + + + + false + false + 0.9 + + + + + + + corda__h2020::8771f523c34e38902d4921037d545ef8 + + REsearch LIfecycle mAnagemeNt for Earth Science Communities and CopErnicus users in EOSC + 101017501 + RELIANCE + + + ec__________::EC::H2020 + ec__________::EC::H2020::RIA + + + + + + + + + + + + https://zenodo.org/record/5554786 + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-related-orp.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-related-orp.xml new file mode 100644 index 000000000..3c2c6440f --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-related-orp.xml @@ -0,0 +1,288 @@ + + +
+ doi_dedup___::44fd8a9b5b79adb0783ac245b21e3127 + 2019-09-19T07:43:31+0000 + 2019-09-19T07:43:31+0000 +
+ + + + + + + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + 6a93c069-a167-44cb-bfe8-74c275637347 + 50|r3730f562f9e::9b434fedc00d568b8e00611a7fa19f41 + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + ada23067-496a-494f-bd82-6ffe3cf4f0fb + 50|r3730f562f9e::b9cd774e8126b6902d56f9a4aa03e1dc + f3bd1041-422c-439d-8e68-c1d0711d130d + 50|r3730f562f9e::b847821a0ca5365b0d971dd89dea6bf1 + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + HCG16 L-band VLA C+D array final data + + + Jones, Michael G. + 2019-01-01 + These are the reduced final data associated with the paper Jones et al. 2019 submitted + to Astronomy & Astrophysics. They are used by a mybinder (https://gke.mybinder.org/) + executable environment to generate the final plots of that paper. The link for this environment + is https://mybinder.org/v2/gh/AMIGA-IAA/hcg-16/master. The raw VLA D and C array data of HCG 16 + were collected by the Very Large Array (http://www.vla.nrao.edu/) in 1989 and 1999, under PI + projects of Barbara Williams. The project numbers are AW234 and AW500 respectively. The file + also includes a grz colour image and r-band image from DECaLS DR8 + (http://legacysurvey.org/decamls/), a GBT HI spectrum published in Borthakur et al. 2010 (ApJ + 710, 385), an HI data cube from HIPASS (https://www.atnf.csiro.au/research/multibeam/release/), + and a source mask (and associated parameters file) for the HIPASS cube generated using SoFiA + (https://github.com/SoFiA-Admin/SoFiA-2). + + 3.5.2.1.1 → Observational astronomy → + Radio astronomy + + HI + + VLA + + HCG16 + + Various + + + 2019-01-01 + + https://b2share.eudat.eu + + + + true + false + 0.8 + dedup-similarity-result-decisiontree-v2 + + + + + userclaim___::ee29372a239b79db3ac4c5debe44d6e6 + + Plot scripts for HCG-16 Project + + + + + 2019-01-01 + HCG16 L-band VLA C+D + array final data + + + B2SHARE + + + 2019-01-01 + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + HCG16 L-band VLA C+D array final data + + + https://b2share.eudat.eu + + + 2019-01-01 + HCG16 L-band VLA C+D array final data + + + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + https://b2share.eudat.eu + + + 2019-01-01 + HCG16 L-band VLA C+D + array final data + + + B2SHARE + + + 2019-01-01 + HCG16 L-band VLA C+D array final data + + + https://b2share.eudat.eu + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + 2019-01-01 + HCG16 L-band VLA C+D + array final data + + + B2SHARE + + + + + + 2019-01-01 + + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + https://dx.doi.org/10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + + + + 2019-01-01 + + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + + + https://dx.doi.org/10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + + + + + + 2019-01-01 + + + https://doi.org10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + + http://dx.doi.org/https://doi.org/10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + + + + + 2019-01-01 + + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + https://dx.doi.org/10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + + + + 2019-01-01 + + + https://doi.org10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + + http://dx.doi.org/https://doi.org/10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + + + + + 2019-01-01 + + + https://doi.org10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + + + + http://dx.doi.org/https://doi.org/10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-sw.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-sw.xml new file mode 100644 index 000000000..5f44f6b1f --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-sw.xml @@ -0,0 +1,112 @@ + + +
+ userclaim___::ee29372a239b79db3ac4c5debe44d6e6 + 2021-10-07T12:42:54Z + +
+ + + + + Plot scripts for HCG-16 Project + + Jones, Michael G. + Jones, Michael G. + 2021-09-30 + + + These are the notebooks to general the final data plots of the paper Jones et al. 2019 + submitted to Astronomy & Astrophysics. They can be used in a notebooks environment (like + https://notebooks.egi.eu/) with the proper libraries installed. A mybinder + (https://mybinder.org/) + ready version can be started from https://mybinder.org/v2/gh/AMIGA-IAA/hcg-16/master. Data to + generate plots is also available from B2SHARE: + https://b2share.eudat.eu/records/a69a7b2dcc22449e8734552dde4d3906 + + + EOSC Jupyter Notebook + + + B2SHARE + + + + + + + + + + + + + + + + + + + userclaim___::ee29372a239b79db3ac4c5debe44d6e6 + + 10.23728/b2share.adf6e2e942b04561a8640c449b48c14a + + + + false + false + 0.9 + + + + + + doi_dedup___::44fd8a9b5b79adb0783ac245b21e3127 + HCG16 L-band VLA C+D array final data + 2019-01-01 + https://b2share.eudat.eu + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + + + + + + + + + 2021-09-30 + + http://dx.doi.org/10.23728/b2share.adf6e2e942b04561a8640c449b48c14a + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml index 23dd6c6ed..6d2ac7630 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml @@ -1,26 +1,25 @@
- r37b0ad08687::dec0d8520e726f2adda9a51280ac7299 - 2021-09-22T08:53:16Z - under curation - + doi_dedup___::ab57f086011a9ae23d1165211dc6e04b + 2020-11-03T05:39:50+0000 + 2020-11-03T05:39:50+0000
EGI-Foundation/data-transfer-pilot: Include libraries in environment.yml - Giuseppe La Rocca - Enol Fernández - Andrea Manzi - + Giuseppe La Rocca + Enol Fernández + Andrea Manzi + 2020-11-03 This notebook is used to demonstrate how a scientist from one of the PaNOSC RIs can use the resources provided by EGI to perform analysis on the data sets obtained during an expirement. EOSC Jupyter Notebook - + 2020-11-03 Zenodo @@ -43,8 +42,8 @@ oai:zenodo.org:4218562 oai:zenodo.org:4218562 - 10.5281/zenodo.4218562 - + 10.5281/zenodo.4195418 + 10.5281/zenodo.4218562 false false @@ -59,7 +58,7 @@ - + 2020-11-03 https://zenodo.org/record/4218562 diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml index 9995b902f..9ab9b9861 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml @@ -1,10 +1,8 @@
- r37b0ad08687::eb430fb7438e1533ba95d6aa50a477eb + doi_dedup___::8539a8de8996e01350f0de8ca4899b7f 2021-09-22T08:53:13Z - under curation -
EGI-Foundation/training-notebooks-seadatanet: Version 0.4 Enol Fernández - + 2019-12-04 - A sample notebook using SeaDataNet data to plot a map that shows surface temperature of Black Sea, Arctic Sea and Baltic Sea. The data is available at EGI DataHub with PID http://hdl.handle.net/21.T15999/qVk6JWQ (run at EGI Notebooks service for easy access to data).This release updates the PID for the data. + A sample notebook using SeaDataNet data to plot a map that shows surface temperature of Black Sea, Arctic Sea and Baltic Sea. The data is available at EGI DataHub with PID http://hdl.handle.net/21.T15999/3Byz9Cw (run at EGI Notebooks service for easy access to data). This release uses the correct path of the data share from the EGI DataHub. EOSC Jupyter Notebook @@ -43,6 +41,9 @@ oai:zenodo.org:3561323 10.5281/zenodo.3561323 + 10.5281/zenodo.3443996 + 10.5281/zenodo.3475539 + 10.5281/zenodo.3475785 false From 8f99d2af86d96194b5822b0a71991d568d034369 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 28 Sep 2021 11:31:19 +0200 Subject: [PATCH 265/287] Make the node of doiBoost to point to the correct OpenAire Organization in relations --- .../main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index e501b4823..501073e74 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -208,7 +208,7 @@ object SparkGenerateDoiBoost { (r.getTarget,r) else ("resolved", r) - }) + })(Encoders.tuple(Encoders.STRING, mapEncoderRel)) val openaireOrganization:Dataset[(String,String)] = spark.read.text(openaireOrganizationPath).as[String].flatMap(s => extractIdGRID(s)).groupByKey(_._2).reduceGroups((x,y) => if (x != null) x else y ).map(_._2) @@ -222,7 +222,7 @@ object SparkGenerateDoiBoost { else currentRels.setTarget(currentOrgs._1) currentRels - }.write.save(s"$workingDirPath/doiBoostPublicationAffiliation") + }.filter(r=> !r.getSource.startsWith("unresolved") && !r.getTarget.startsWith("unresolved")).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation") magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).map( item => { val affiliation = item._2 From de810073024185a9da4cb7e97eb0dd4c38510093 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Fri, 8 Oct 2021 16:54:56 +0300 Subject: [PATCH 266/287] Add exploreTestConfig, a new Solr configuration folder --- .../solr/conf/exploreTestConfig/elevate.xml | 31 + .../lang/contractions_ca.txt | 8 + .../lang/contractions_fr.txt | 15 + .../lang/contractions_ga.txt | 5 + .../lang/contractions_it.txt | 23 + .../lang/hyphenations_ga.txt | 5 + .../exploreTestConfig/lang/stemdict_nl.txt | 6 + .../exploreTestConfig/lang/stoptags_ja.txt | 420 +++++ .../exploreTestConfig/lang/stopwords_ar.txt | 125 ++ .../exploreTestConfig/lang/stopwords_bg.txt | 193 +++ .../exploreTestConfig/lang/stopwords_ca.txt | 220 +++ .../exploreTestConfig/lang/stopwords_cz.txt | 172 ++ .../exploreTestConfig/lang/stopwords_da.txt | 110 ++ .../exploreTestConfig/lang/stopwords_de.txt | 294 ++++ .../exploreTestConfig/lang/stopwords_el.txt | 78 + .../exploreTestConfig/lang/stopwords_en.txt | 54 + .../exploreTestConfig/lang/stopwords_es.txt | 356 ++++ .../exploreTestConfig/lang/stopwords_eu.txt | 99 ++ .../exploreTestConfig/lang/stopwords_fa.txt | 313 ++++ .../exploreTestConfig/lang/stopwords_fi.txt | 97 ++ .../exploreTestConfig/lang/stopwords_fr.txt | 186 ++ .../exploreTestConfig/lang/stopwords_ga.txt | 110 ++ .../exploreTestConfig/lang/stopwords_gl.txt | 161 ++ .../exploreTestConfig/lang/stopwords_hi.txt | 235 +++ .../exploreTestConfig/lang/stopwords_hu.txt | 211 +++ .../exploreTestConfig/lang/stopwords_hy.txt | 46 + .../exploreTestConfig/lang/stopwords_id.txt | 359 ++++ .../exploreTestConfig/lang/stopwords_it.txt | 303 ++++ .../exploreTestConfig/lang/stopwords_ja.txt | 127 ++ .../exploreTestConfig/lang/stopwords_lv.txt | 172 ++ .../exploreTestConfig/lang/stopwords_nl.txt | 119 ++ .../exploreTestConfig/lang/stopwords_no.txt | 194 +++ .../exploreTestConfig/lang/stopwords_pt.txt | 253 +++ .../exploreTestConfig/lang/stopwords_ro.txt | 233 +++ .../exploreTestConfig/lang/stopwords_ru.txt | 243 +++ .../exploreTestConfig/lang/stopwords_sv.txt | 133 ++ .../exploreTestConfig/lang/stopwords_th.txt | 119 ++ .../exploreTestConfig/lang/stopwords_tr.txt | 212 +++ .../exploreTestConfig/lang/userdict_ja.txt | 29 + .../conf/exploreTestConfig/managed-schema | 411 +++++ .../solr/conf/exploreTestConfig/params.json | 20 + .../solr/conf/exploreTestConfig/protwords.txt | 21 + .../conf/exploreTestConfig/solrconfig.xml | 1506 +++++++++++++++++ .../solr/conf/exploreTestConfig/stopwords.txt | 14 + .../solr/conf/exploreTestConfig/synonyms.txt | 29 + 45 files changed, 8070 insertions(+) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/elevate.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_ca.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_fr.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_ga.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_it.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/hyphenations_ga.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stemdict_nl.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stoptags_ja.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ar.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_bg.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ca.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_cz.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_da.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_de.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_el.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_en.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_es.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_eu.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_fa.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_fi.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_fr.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ga.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_gl.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_hi.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_hu.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_hy.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_id.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_it.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ja.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_lv.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_nl.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_no.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_pt.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ro.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ru.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_sv.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_th.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_tr.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/userdict_ja.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/params.json create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/protwords.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/solrconfig.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/stopwords.txt create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/synonyms.txt diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/elevate.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/elevate.xml new file mode 100644 index 000000000..668332b28 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/elevate.xml @@ -0,0 +1,31 @@ +Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_ca.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_ca.txt new file mode 100644 index 000000000..307a85f91 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_fr.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_fr.txt new file mode 100644 index 000000000..f1bba51b2 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_fr.txt @@ -0,0 +1,15 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j +d +c +jusqu +quoiqu +lorsqu +puisqu diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_ga.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_ga.txt new file mode 100644 index 000000000..9ebe7fa34 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_it.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_it.txt new file mode 100644 index 000000000..cac040953 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/hyphenations_ga.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/hyphenations_ga.txt new file mode 100644 index 000000000..4d2642cc5 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stemdict_nl.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stemdict_nl.txt new file mode 100644 index 000000000..441072971 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stoptags_ja.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stoptags_ja.txt new file mode 100644 index 000000000..71b750845 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, 幾 +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, 侭, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ar.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ar.txt new file mode 100644 index 000000000..046829db6 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_bg.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_bg.txt new file mode 100644 index 000000000..1ae4ba2ae --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ca.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ca.txt new file mode 100644 index 000000000..3da65deaf --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_cz.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_cz.txt new file mode 100644 index 000000000..53c6097da --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_da.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_da.txt new file mode 100644 index 000000000..42e6145b9 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_da.txt @@ -0,0 +1,110 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_de.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_de.txt new file mode 100644 index 000000000..86525e7ae --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_de.txt @@ -0,0 +1,294 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_el.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_el.txt new file mode 100644 index 000000000..232681f5b --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_en.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_en.txt new file mode 100644 index 000000000..2c164c0b2 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_es.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_es.txt new file mode 100644 index 000000000..487d78c8d --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_es.txt @@ -0,0 +1,356 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_eu.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_eu.txt new file mode 100644 index 000000000..25f1db934 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_fa.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_fa.txt new file mode 100644 index 000000000..723641c6d --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_fi.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_fi.txt new file mode 100644 index 000000000..4372c9a05 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_fi.txt @@ -0,0 +1,97 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_fr.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_fr.txt new file mode 100644 index 000000000..749abae68 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_fr.txt @@ -0,0 +1,186 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +cela | that +celà | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ga.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ga.txt new file mode 100644 index 000000000..9ff88d747 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_gl.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_gl.txt new file mode 100644 index 000000000..d8760b12c --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_hi.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_hi.txt new file mode 100644 index 000000000..86286bb08 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_hu.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_hu.txt new file mode 100644 index 000000000..37526da8a --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_hu.txt @@ -0,0 +1,211 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_hy.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_hy.txt new file mode 100644 index 000000000..60c1c50fb --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_id.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_id.txt new file mode 100644 index 000000000..4617f83a5 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_it.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_it.txt new file mode 100644 index 000000000..1219cc773 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_it.txt @@ -0,0 +1,303 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ja.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ja.txt new file mode 100644 index 000000000..d4321be6b --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_lv.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_lv.txt new file mode 100644 index 000000000..e21a23c06 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_nl.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_nl.txt new file mode 100644 index 000000000..47a2aeacf --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_nl.txt @@ -0,0 +1,119 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_no.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_no.txt new file mode 100644 index 000000000..a7a2c28ba --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_no.txt @@ -0,0 +1,194 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +være | to be +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_pt.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_pt.txt new file mode 100644 index 000000000..acfeb01af --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_pt.txt @@ -0,0 +1,253 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ro.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ro.txt new file mode 100644 index 000000000..4fdee90a5 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aţi +au +avea +avem +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulţi +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteţi +spre +sub +sunt +suntem +sunteţi +ta +tăi +tale +tău +te +ţi +ţie +tine +toată +toate +tot +toţi +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ru.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ru.txt new file mode 100644 index 000000000..55271400c --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_ru.txt @@ -0,0 +1,243 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_sv.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_sv.txt new file mode 100644 index 000000000..096f87f67 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_sv.txt @@ -0,0 +1,133 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_th.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_th.txt new file mode 100644 index 000000000..07f0fabe6 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_tr.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_tr.txt new file mode 100644 index 000000000..84d9408d4 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/userdict_ja.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/userdict_ja.txt new file mode 100644 index 000000000..6f0368e4d --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema new file mode 100644 index 000000000..bcf4e189d --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema @@ -0,0 +1,411 @@ + + + + __indexrecordidentifier + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/params.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/params.json new file mode 100644 index 000000000..06114ef25 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/params.json @@ -0,0 +1,20 @@ +{"params":{ + "query":{ + "defType":"edismax", + "q.alt":"*:*", + "rows":"10", + "fl":"*,score", + "":{"v":0} + }, + "facets":{ + "facet":"on", + "facet.mincount": "1", + "":{"v":0} + }, + "velocity":{ + "wt": "velocity", + "v.template":"browse", + "v.layout": "layout", + "":{"v":0} + } +}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/protwords.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/protwords.txt new file mode 100644 index 000000000..1dfc0abec --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/solrconfig.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/solrconfig.xml new file mode 100644 index 000000000..de38e7307 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/solrconfig.xml @@ -0,0 +1,1506 @@ + + + + + + + + + 7.5.0 + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.lock.type:native} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + ${solr.ulog.numVersionBuckets:65536} + + + + + ${solr.autoCommit.maxTime:15000} + false + + + + + + ${solr.autoSoftCommit.maxTime:-1} + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + + + + + false + + + + + + + + + + + + + + + + + + + + + + + explicit + AND + 10 + + + + + + + + + + + + + + + + explicit + AND + 10 + + + edismax + + + 1.0 + + + + resultidentifier^100 + resultauthor^34 + resultacceptanceyear^21 + resultsubject^13 + resulttitle^8 + relprojectname^5 + resultdescription^3 + __all + + + + + + + + + + + + + + + + + + + + + + + + + + + explicit + json + true + + + + + + + + explicit + + + + + + __all + + + + + + + + + true + ignored_ + __all + + + + + + + + text_general + + + + + + default + __all + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + + + + + + + default + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + true + + + tvComponent + + + + + + + + + + + + true + false + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + [^\w-\.] + _ + + + + + + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm:ss,SSSZ + yyyy-MM-dd'T'HH:mm:ss.SSS + yyyy-MM-dd'T'HH:mm:ss,SSS + yyyy-MM-dd'T'HH:mm:ssZ + yyyy-MM-dd'T'HH:mm:ss + yyyy-MM-dd'T'HH:mmZ + yyyy-MM-dd'T'HH:mm + yyyy-MM-dd HH:mm:ss.SSSZ + yyyy-MM-dd HH:mm:ss,SSSZ + yyyy-MM-dd HH:mm:ss.SSS + yyyy-MM-dd HH:mm:ss,SSS + yyyy-MM-dd HH:mm:ssZ + yyyy-MM-dd HH:mm:ss + yyyy-MM-dd HH:mmZ + yyyy-MM-dd HH:mm + yyyy-MM-dd + + + + strings + + java.lang.Boolean + booleans + + + java.util.Date + tdates + + + java.lang.Long + java.lang.Integer + tlongs + + + java.lang.Number + tdoubles + + + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + ${velocity.template.base.dir:} + ${velocity.solr.resource.loader.enabled:true} + ${velocity.params.resource.loader.enabled:false} + + + + + 5 + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/stopwords.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/stopwords.txt new file mode 100644 index 000000000..ae1e83eeb --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/synonyms.txt b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/synonyms.txt new file mode 100644 index 000000000..eab4ee875 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + From e468a7b96b99fe9ac717a9038f5bc222c5f557e4 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Fri, 8 Oct 2021 16:58:51 +0300 Subject: [PATCH 267/287] Add tests to query Solr with different configurations --- .../oa/provision/SolrConfigExploreTest.java | 126 +++++++++++++++++ .../dhp/oa/provision/SolrConfigTest.java | 131 ++++++++++++++++++ .../dhp/oa/provision/SolrExploreTest.java | 109 +++++++++++++++ 3 files changed, 366 insertions(+) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java create mode 100644 dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java create mode 100644 dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java new file mode 100644 index 000000000..9bc2924c3 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java @@ -0,0 +1,126 @@ + +package eu.dnetlib.dhp.oa.provision; + +import java.io.IOException; +import java.net.URI; + +import org.apache.commons.io.IOUtils; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.params.CommonParams; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; + +import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument; +import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; + +@ExtendWith(MockitoExtension.class) +public class SolrConfigExploreTest extends SolrExploreTest { + + protected static SparkSession spark; + + private static final Integer batchSize = 100; + + @Mock + private ISLookUpService isLookUpService; + + @Mock + private ISLookupClient isLookupClient; + + @BeforeEach + public void prepareMocks() throws ISLookUpException, IOException { + isLookupClient.setIsLookup(isLookUpService); + + int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort(); + + Mockito + .when(isLookupClient.getDsId(Mockito.anyString())) + .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); + Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort)); + Mockito + .when(isLookupClient.getLayoutSource(Mockito.anyString())) + .thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml"))); + Mockito + .when(isLookupClient.getLayoutTransformer()) + .thenReturn(IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"))); + } + + @BeforeAll + public static void before() { + + SparkConf conf = new SparkConf(); + conf.setAppName(XmlIndexingJobTest.class.getSimpleName()); + conf.registerKryoClasses(new Class[] { + SerializableSolrInputDocument.class + }); + + conf.setMaster("local[1]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString()); + + spark = SparkSession + .builder() + .appName(XmlIndexingJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + + + } + + @AfterAll + public static void tearDown() { + spark.stop(); + } + + @Test + public void testSolrConfig() throws Exception { + + String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; + + new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, XmlIndexingJob.OutputFormat.SOLR, null).run(isLookupClient); + Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); + + String[] queryStrings = { + "cancer", + "graph", + "graphs" + }; + + for (String q : queryStrings) { + SolrQuery query = new SolrQuery(); + query.setRequestHandler("/exploreSearch"); + query.add(CommonParams.Q, q); + query.set("debugQuery", "on"); + + log.info("Submit query to Solr with params: {}", query.toString()); + QueryResponse rsp = miniCluster.getSolrClient().query(query); +// System.out.println(rsp.getHighlighting()); +// System.out.println(rsp.getExplainMap()); + + for (SolrDocument doc : rsp.getResults()) { + System.out.println( + doc.get("score") + "\t" + + doc.get("__indexrecordidentifier") + "\t" + + doc.get("resultidentifier") + "\t" + + doc.get("resultauthor") + "\t" + + doc.get("resultacceptanceyear") + "\t" + + doc.get("resultsubject") + "\t" + + doc.get("resulttitle") + "\t" + + doc.get("relprojectname") + "\t" + + doc.get("resultdescription") + "\t" + + doc.get("__all") + "\t" + ); + } + } + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java new file mode 100644 index 000000000..e20ecf152 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java @@ -0,0 +1,131 @@ + +package eu.dnetlib.dhp.oa.provision; + +import java.io.IOException; +import java.io.StringReader; +import java.net.URI; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.Text; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.common.params.CommonParams; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.dom4j.io.SAXReader; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; + +import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument; +import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; + +@ExtendWith(MockitoExtension.class) +public class SolrConfigTest extends SolrTest { + + protected static SparkSession spark; + + private static final Integer batchSize = 100; + + @Mock + private ISLookUpService isLookUpService; + + @Mock + private ISLookupClient isLookupClient; + + @BeforeEach + public void prepareMocks() throws ISLookUpException, IOException { + isLookupClient.setIsLookup(isLookUpService); + + int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort(); + + Mockito + .when(isLookupClient.getDsId(Mockito.anyString())) + .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); + Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort)); + Mockito + .when(isLookupClient.getLayoutSource(Mockito.anyString())) + .thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml"))); + Mockito + .when(isLookupClient.getLayoutTransformer()) + .thenReturn(IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"))); + } + + @BeforeAll + public static void before() { + + SparkConf conf = new SparkConf(); + conf.setAppName(XmlIndexingJobTest.class.getSimpleName()); + conf.registerKryoClasses(new Class[] { + SerializableSolrInputDocument.class + }); + + conf.setMaster("local[1]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString()); + + spark = SparkSession + .builder() + .appName(XmlIndexingJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + + + } + + @AfterAll + public static void tearDown() { + spark.stop(); + } + + @Test + public void testSolrConfig() throws Exception { + + String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; + + new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, XmlIndexingJob.OutputFormat.SOLR, null).run(isLookupClient); + Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); + + String[] queryStrings = { + "cancer", + "graph", + "graphs" + }; + + for (String q : queryStrings) { + SolrQuery query = new SolrQuery(); + query.add(CommonParams.Q, q); + + log.info("Submit query to Solr with params: {}", query.toString()); + QueryResponse rsp = miniCluster.getSolrClient().query(query); + + for (SolrDocument doc : rsp.getResults()) { + System.out.println( + doc.get("score") + "\t" + + doc.get("__indexrecordidentifier") + "\t" + + doc.get("resultidentifier") + "\t" + + doc.get("resultauthor") + "\t" + + doc.get("resultacceptanceyear") + "\t" + + doc.get("resultsubject") + "\t" + + doc.get("resulttitle") + "\t" + + doc.get("relprojectname") + "\t" + + doc.get("resultdescription") + "\t" + + doc.get("__all") + "\t" + ); + } + } + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java new file mode 100644 index 000000000..b86fd8ac8 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java @@ -0,0 +1,109 @@ + +package eu.dnetlib.dhp.oa.provision; + +import java.io.File; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.client.solrj.embedded.JettyConfig; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.ConfigSetAdminRequest; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.cloud.MiniSolrCloudCluster; +import org.apache.solr.common.params.CollectionParams; +import org.apache.solr.common.params.CoreAdminParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class SolrExploreTest { + + protected static final Logger log = LoggerFactory.getLogger(SolrTest.class); + + protected static final String FORMAT = "test"; + protected static final String DEFAULT_COLLECTION = FORMAT + "-index-openaire"; + protected static final String CONFIG_NAME = "testConfig"; + + protected static MiniSolrCloudCluster miniCluster; + + @TempDir + public static Path workingDir; + + @BeforeAll + public static void setup() throws Exception { + + // random unassigned HTTP port + final int jettyPort = 0; + final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build(); + + log.info(String.format("working directory: %s", workingDir.toString())); + System.setProperty("solr.log.dir", workingDir.resolve("logs").toString()); + + // create a MiniSolrCloudCluster instance + miniCluster = new MiniSolrCloudCluster(2, workingDir.resolve("solr"), jettyConfig); + + // Upload Solr configuration directory to ZooKeeper + String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig"; + File configDir = new File(solrZKConfigDir); + + miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME); + + // override settings in the solrconfig include + System.setProperty("solr.tests.maxBufferedDocs", "100000"); + System.setProperty("solr.tests.maxIndexingThreads", "-1"); + System.setProperty("solr.tests.ramBufferSizeMB", "100"); + + // use non-test classes so RandomizedRunner isn't necessary + System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler"); + System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory"); + System.setProperty("solr.lock.type", "single"); + + log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString()); + log + .info( + CollectionAdminRequest.ClusterStatus + .getClusterStatus() + .process(miniCluster.getSolrClient()) + .toString()); + + NamedList res = createCollection( + miniCluster.getSolrClient(), DEFAULT_COLLECTION, 4, 2, 20, CONFIG_NAME); + res.forEach(o -> log.info(o.toString())); + + miniCluster.getSolrClient().setDefaultCollection(DEFAULT_COLLECTION); + + log + .info( + CollectionAdminRequest.ClusterStatus + .getClusterStatus() + .process(miniCluster.getSolrClient()) + .toString()); + + } + + @AfterAll + public static void shutDown() throws Exception { + miniCluster.shutdown(); + FileUtils.deleteDirectory(workingDir.toFile()); + } + + protected static NamedList createCollection(CloudSolrClient client, String name, int numShards, + int replicationFactor, int maxShardsPerNode, String configName) throws Exception { + ModifiableSolrParams modParams = new ModifiableSolrParams(); + modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name()); + modParams.set("name", name); + modParams.set("numShards", numShards); + modParams.set("replicationFactor", replicationFactor); + modParams.set("collection.configName", configName); + modParams.set("maxShardsPerNode", maxShardsPerNode); + QueryRequest request = new QueryRequest(modParams); + request.setPath("/admin/collections"); + return client.request(request); + } + +} From 201ce71cc184c87e1b6df23fd2cc5430cb4e4309 Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Mon, 11 Oct 2021 13:16:39 +0300 Subject: [PATCH 268/287] Add resultsubject, relprojectname and resultacceptanceyear to __all field --- .../conf/exploreTestConfig/managed-schema | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema index bcf4e189d..39c811f83 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema @@ -369,15 +369,15 @@ - + + + - - - - + + @@ -390,22 +390,35 @@ + + + + + + + + + + + + + + + + + + + + - - - - - - - - + \ No newline at end of file From 5606014b178bc6fd72b1121652de816b5be717ef Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 12 Oct 2021 08:11:53 +0200 Subject: [PATCH 269/287] code refactor see ticket #7065 --- .../eu/dnetllib/dhp/sx}/bio/BioDBToOAF.scala | 77 +++++++++--------- .../bio/SparkTransformBioDatabaseToOAF.scala | 15 ++-- .../ebi/SparkCreateBaselineDataFrame.scala | 62 +++++++------- .../sx/bio}/ebi/SparkDownloadEBILinks.scala | 39 ++++----- .../dhp/sx/bio}/ebi/SparkEBILinksToOaf.scala | 21 +++-- .../dhp/sx}/bio/pubmed/PMArticle.java | 2 +- .../dnetllib/dhp/sx}/bio/pubmed/PMAuthor.java | 2 +- .../dnetllib/dhp/sx}/bio/pubmed/PMGrant.java | 2 +- .../dhp/sx}/bio/pubmed/PMJournal.java | 2 +- .../dhp/sx}/bio/pubmed/PMParser.scala | 2 +- .../dhp/sx}/bio/pubmed/PMSubject.java | 2 +- .../dhp/sx}/bio/pubmed/PubMedToOaf.scala | 23 +++--- .../sx/bio}/ebi/baseline_to_oaf_params.json | 0 .../dhp/sx/bio}/ebi/ebi_download_update.json | 0 .../dhp/sx/bio}/ebi/ebi_to_df_params.json | 0 .../bio}/pubmed/oozie_app/config-default.xml | 0 .../dhp/sx/bio}/pubmed/oozie_app/workflow.xml | 13 ++- .../dnetllib/dhp/sx/bio}/BioScholixTest.scala | 15 ++-- .../dnetlib/dhp/sx/graph/bio/crossref_links | 0 .../eu/dnetlib/dhp/sx/graph/bio/ebi_links.gz | Bin .../eu/dnetlib/dhp/sx/graph/bio}/ls_result | 0 .../eu/dnetlib/dhp/sx/graph/bio/pdb_dump | 0 .../eu/dnetlib/dhp/sx/graph/bio}/pubmed.xml | 0 .../eu/dnetlib/dhp/sx/graph/bio}/pubmed_dump | 0 .../dnetlib/dhp/sx/graph/bio/scholix_resolved | 0 .../eu/dnetlib/dhp/sx/graph/bio/uniprot_dump | 0 .../eu/dnetlib/dhp/PropagationConstant.java | 11 ++- .../SparkOrcidToResultFromSemRelJob.java | 7 +- ...kResultToCommunityFromOrganizationJob.java | 4 +- ...parkResultToCommunityThroughSemRelJob.java | 4 +- .../SparkGeneratePanagaeaDataset.scala | 1 - .../provision/IndexRecordTransformerTest.java | 11 +-- 32 files changed, 155 insertions(+), 160 deletions(-) rename dhp-workflows/{dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx}/bio/BioDBToOAF.scala (83%) rename dhp-workflows/{dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx}/bio/SparkTransformBioDatabaseToOAF.scala (86%) rename dhp-workflows/{dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio}/ebi/SparkCreateBaselineDataFrame.scala (76%) rename dhp-workflows/{dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio}/ebi/SparkDownloadEBILinks.scala (70%) rename dhp-workflows/{dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio}/ebi/SparkEBILinksToOaf.scala (67%) rename dhp-workflows/{dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx}/bio/pubmed/PMArticle.java (97%) rename dhp-workflows/{dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx}/bio/pubmed/PMAuthor.java (92%) rename dhp-workflows/{dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx}/bio/pubmed/PMGrant.java (93%) rename dhp-workflows/{dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx}/bio/pubmed/PMJournal.java (94%) rename dhp-workflows/{dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx}/bio/pubmed/PMParser.scala (99%) rename dhp-workflows/{dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx}/bio/pubmed/PMSubject.java (94%) rename dhp-workflows/{dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx}/bio/pubmed/PubMedToOaf.scala (93%) rename dhp-workflows/{dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio}/ebi/baseline_to_oaf_params.json (100%) rename dhp-workflows/{dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio}/ebi/ebi_download_update.json (100%) rename dhp-workflows/{dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio}/ebi/ebi_to_df_params.json (100%) rename dhp-workflows/{dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio}/pubmed/oozie_app/config-default.xml (100%) rename dhp-workflows/{dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio}/pubmed/oozie_app/workflow.xml (78%) rename dhp-workflows/{dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed => dhp-aggregation/src/test/java/eu/dnetllib/dhp/sx/bio}/BioScholixTest.scala (92%) rename dhp-workflows/{dhp-graph-mapper => dhp-aggregation}/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/crossref_links (100%) rename dhp-workflows/{dhp-graph-mapper => dhp-aggregation}/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/ebi_links.gz (100%) rename dhp-workflows/{dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed => dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio}/ls_result (100%) rename dhp-workflows/{dhp-graph-mapper => dhp-aggregation}/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pdb_dump (100%) rename dhp-workflows/{dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed => dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio}/pubmed.xml (100%) rename dhp-workflows/{dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed => dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio}/pubmed_dump (100%) rename dhp-workflows/{dhp-graph-mapper => dhp-aggregation}/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/scholix_resolved (100%) rename dhp-workflows/{dhp-graph-mapper => dhp-aggregation}/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump (100%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/BioDBToOAF.scala similarity index 83% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/BioDBToOAF.scala index 90b65c8f7..dffc88c6c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/BioDBToOAF.scala @@ -1,14 +1,12 @@ -package eu.dnetlib.dhp.sx.graph.bio +package eu.dnetllib.dhp.sx.bio import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, OafMapperUtils} -import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Instance, KeyValue, Oaf, Relation, StructuredProperty} +import eu.dnetlib.dhp.schema.oaf._ import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.{compact, parse, render} - -import scala.collection.JavaConverters._ - +import collection.JavaConverters._ object BioDBToOAF { case class EBILinkItem(id: Long, links: String) {} @@ -17,23 +15,23 @@ object BioDBToOAF { case class UniprotDate(date: String, date_info: String) {} - case class ScholixResolved(pid:String, pidType:String, typology:String, tilte:List[String], datasource:List[String], date:List[String], authors:List[String]){} + case class ScholixResolved(pid: String, pidType: String, typology: String, tilte: List[String], datasource: List[String], date: List[String], authors: List[String]) {} val DATA_INFO: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9") val SUBJ_CLASS = "Keywords" val DATE_RELATION_KEY = "RelationDate" - val resolvedURL:Map[String,String] = Map( - "genbank"-> "https://www.ncbi.nlm.nih.gov/nuccore/", - "ncbi-n" -> "https://www.ncbi.nlm.nih.gov/nuccore/", - "ncbi-wgs" -> "https://www.ncbi.nlm.nih.gov/nuccore/", - "ncbi-p" -> "https://www.ncbi.nlm.nih.gov/protein/", - "ena" -> "https://www.ebi.ac.uk/ena/browser/view/", - "clinicaltrials.gov"-> "https://clinicaltrials.gov/ct2/show/", - "onim"-> "https://omim.org/entry/", - "refseq"-> "https://www.ncbi.nlm.nih.gov/nuccore/", - "geo"-> "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=" + val resolvedURL: Map[String, String] = Map( + "genbank" -> "https://www.ncbi.nlm.nih.gov/nuccore/", + "ncbi-n" -> "https://www.ncbi.nlm.nih.gov/nuccore/", + "ncbi-wgs" -> "https://www.ncbi.nlm.nih.gov/nuccore/", + "ncbi-p" -> "https://www.ncbi.nlm.nih.gov/protein/", + "ena" -> "https://www.ebi.ac.uk/ena/browser/view/", + "clinicaltrials.gov" -> "https://clinicaltrials.gov/ct2/show/", + "onim" -> "https://omim.org/entry/", + "refseq" -> "https://www.ncbi.nlm.nih.gov/nuccore/", + "geo" -> "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=" ) @@ -45,7 +43,7 @@ object BioDBToOAF { val ElsevierCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|openaire____::8f87e10869299a5fe80b315695296b88", "Elsevier") val springerNatureCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|openaire____::6e380d9cf51138baec8480f5a0ce3a2e", "Springer Nature") val EBICollectedFrom: KeyValue = OafMapperUtils.keyValue("10|opendoar____::83e60e09c222f206c725385f53d7e567c", "EMBL-EBIs Protein Data Bank in Europe (PDBe)") - val pubmedCollectedFrom:KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") + val pubmedCollectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") UNIPROTCollectedFrom.setDataInfo(DATA_INFO) PDBCollectedFrom.setDataInfo(DATA_INFO) @@ -58,9 +56,9 @@ object BioDBToOAF { Map( "uniprot" -> UNIPROTCollectedFrom, - "pdb"-> PDBCollectedFrom, - "elsevier" ->ElsevierCollectedFrom, - "ebi" ->EBICollectedFrom, + "pdb" -> PDBCollectedFrom, + "elsevier" -> ElsevierCollectedFrom, + "ebi" -> EBICollectedFrom, "Springer Nature" -> springerNatureCollectedFrom, "NCBI Nucleotide" -> ncbiCollectedFrom, "European Nucleotide Archive" -> enaCollectedFrom, @@ -68,7 +66,7 @@ object BioDBToOAF { ) } - def crossrefLinksToOaf(input:String):Oaf = { + def crossrefLinksToOaf(input: String): Oaf = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json = parse(input) val source_pid = (json \ "Source" \ "Identifier" \ "ID").extract[String].toLowerCase @@ -77,16 +75,16 @@ object BioDBToOAF { val target_pid = (json \ "Target" \ "Identifier" \ "ID").extract[String].toLowerCase val target_pid_type = (json \ "Target" \ "Identifier" \ "IDScheme").extract[String].toLowerCase - val relation_semantic= (json \ "RelationshipType" \ "Name").extract[String] + val relation_semantic = (json \ "RelationshipType" \ "Name").extract[String] val date = GraphCleaningFunctions.cleanDate((json \ "LinkedPublicationDate").extract[String]) - createRelation(target_pid, target_pid_type, generate_unresolved_id(source_pid, source_pid_type),collectedFromMap("elsevier"),"relationship", relation_semantic, date) + createRelation(target_pid, target_pid_type, generate_unresolved_id(source_pid, source_pid_type), collectedFromMap("elsevier"), "relationship", relation_semantic, date) } - def scholixResolvedToOAF(input:ScholixResolved):Oaf = { + def scholixResolvedToOAF(input: ScholixResolved): Oaf = { val d = new Dataset @@ -127,18 +125,18 @@ object BioDBToOAF { d.setInstance(List(i).asJava) if (input.authors != null && input.authors.nonEmpty) { - val authors = input.authors.map(a =>{ + val authors = input.authors.map(a => { val authorOAF = new Author authorOAF.setFullname(a) authorOAF }) d.setAuthor(authors.asJava) } - if (input.date!= null && input.date.nonEmpty) { - val dt = input.date.head - i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO)) - d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO)) - } + if (input.date != null && input.date.nonEmpty) { + val dt = input.date.head + i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO)) + d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO)) + } d } @@ -190,7 +188,7 @@ object BioDBToOAF { OafMapperUtils.structuredProperty(s, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, null) ).asJava) } - var i_date:Option[UniprotDate] = None + var i_date: Option[UniprotDate] = None if (dates.nonEmpty) { i_date = dates.find(d => d.date_info.contains("entry version")) @@ -218,12 +216,12 @@ object BioDBToOAF { if (references_pmid != null && references_pmid.nonEmpty) { - val rel = createRelation(references_pmid.head, "pmid", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) + val rel = createRelation(references_pmid.head, "pmid", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) rel.getCollectedfrom List(d, rel) } else if (references_doi != null && references_doi.nonEmpty) { - val rel = createRelation(references_doi.head, "doi", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) + val rel = createRelation(references_doi.head, "doi", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) List(d, rel) } else @@ -231,13 +229,12 @@ object BioDBToOAF { } - - def generate_unresolved_id(pid:String, pidType:String) :String = { + def generate_unresolved_id(pid: String, pidType: String): String = { s"unresolved::$pid::$pidType" } - def createRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, subRelType:String, relClass:String, date:String):Relation = { + def createRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, subRelType: String, relClass: String, date: String): Relation = { val rel = new Relation rel.setCollectedfrom(List(collectedFromMap("pdb")).asJava) @@ -251,7 +248,7 @@ object BioDBToOAF { rel.setTarget(s"unresolved::$pid::$pidType") - val dateProps:KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) + val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) rel.setProperties(List(dateProps).asJava) @@ -262,8 +259,8 @@ object BioDBToOAF { } - def createSupplementaryRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, date:String): Relation = { - createRelation(pid,pidType,sourceId,collectedFrom, ModelConstants.SUPPLEMENT, ModelConstants.IS_SUPPLEMENT_TO, date) + def createSupplementaryRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, date: String): Relation = { + createRelation(pid, pidType, sourceId, collectedFrom, ModelConstants.SUPPLEMENT, ModelConstants.IS_SUPPLEMENT_TO, date) } @@ -338,7 +335,7 @@ object BioDBToOAF { def EBITargetLinksFilter(input: EBILinks): Boolean = { - input.targetPidType.equalsIgnoreCase("ena") || input.targetPidType.equalsIgnoreCase("pdb") || input.targetPidType.equalsIgnoreCase("uniprot") + input.targetPidType.equalsIgnoreCase("ena") || input.targetPidType.equalsIgnoreCase("pdb") || input.targetPidType.equalsIgnoreCase("uniprot") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/SparkTransformBioDatabaseToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala similarity index 86% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/SparkTransformBioDatabaseToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala index d66cc84ec..16d2b25a6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/SparkTransformBioDatabaseToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala @@ -1,8 +1,8 @@ -package eu.dnetlib.dhp.sx.graph.bio +package eu.dnetllib.dhp.sx.bio import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} -import BioDBToOAF.ScholixResolved +import eu.dnetlib.dhp.schema.oaf.Oaf +import eu.dnetllib.dhp.sx.bio.BioDBToOAF.ScholixResolved import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} @@ -31,17 +31,16 @@ object SparkTransformBioDatabaseToOAF { .master(parser.get("master")).getOrCreate() val sc = spark.sparkContext - implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) - import spark.implicits._ - + implicit val resultEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + import spark.implicits._ database.toUpperCase() match { case "UNIPROT" => spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.uniprotToOAF(i))).write.mode(SaveMode.Overwrite).save(targetPath) - case "PDB"=> + case "PDB" => spark.createDataset(sc.textFile(dbPath).flatMap(i => BioDBToOAF.pdbTOOaf(i))).write.mode(SaveMode.Overwrite).save(targetPath) case "SCHOLIX" => spark.read.load(dbPath).as[ScholixResolved].map(i => BioDBToOAF.scholixResolvedToOAF(i)).write.mode(SaveMode.Overwrite).save(targetPath) - case "CROSSREF_LINKS"=> + case "CROSSREF_LINKS" => spark.createDataset(sc.textFile(dbPath).map(i => BioDBToOAF.crossrefLinksToOaf(i))).write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index ee76cf8ad..97b3cdc99 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -1,10 +1,10 @@ -package eu.dnetlib.dhp.sx.graph.ebi +package eu.dnetllib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.oaf.Result -import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} import eu.dnetlib.dhp.utils.ISLookupClientFactory +import eu.dnetllib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FSDataOutputStream, FileSystem, Path} @@ -24,24 +24,24 @@ import scala.xml.pull.XMLEventReader object SparkCreateBaselineDataFrame { - def requestBaseLineUpdatePage(maxFile:String):List[(String,String)] = { - val data =requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") + def requestBaseLineUpdatePage(maxFile: String): List[(String, String)] = { + val data = requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") - val result =data.lines.filter(l => l.startsWith("") val start = l.indexOf("= 0 && end >start) - l.substring(start+9, (end-start)) + if (start >= 0 && end > start) + l.substring(start + 9, (end - start)) else "" - }.filter(s =>s.endsWith(".gz") ).filter(s => s > maxFile).map(s => (s,s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s")).toList + }.filter(s => s.endsWith(".gz")).filter(s => s > maxFile).map(s => (s, s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s")).toList result } - def downloadBaselinePart(url:String):InputStream = { + def downloadBaselinePart(url: String): InputStream = { val r = new HttpGet(url) val timeout = 60; // seconds val config = RequestConfig.custom() @@ -55,7 +55,7 @@ object SparkCreateBaselineDataFrame { } - def requestPage(url:String):String = { + def requestPage(url: String): String = { val r = new HttpGet(url) val timeout = 60; // seconds val config = RequestConfig.custom() @@ -90,25 +90,21 @@ object SparkCreateBaselineDataFrame { } - - - - - def downloadBaseLineUpdate(baselinePath:String, hdfsServerUri:String ):Unit = { + def downloadBaseLineUpdate(baselinePath: String, hdfsServerUri: String): Unit = { val conf = new Configuration conf.set("fs.defaultFS", hdfsServerUri) - val fs = FileSystem.get(conf) + val fs = FileSystem.get(conf) val p = new Path(baselinePath) - val files = fs.listFiles(p,false) + val files = fs.listFiles(p, false) var max_file = "" while (files.hasNext) { val c = files.next() val data = c.getPath.toString - val fileName = data.substring(data.lastIndexOf("/")+1) + val fileName = data.substring(data.lastIndexOf("/") + 1) - if (fileName> max_file) + if (fileName > max_file) max_file = fileName } @@ -119,7 +115,7 @@ object SparkCreateBaselineDataFrame { val fsDataOutputStream: FSDataOutputStream = fs.create(hdfsWritePath, true) val i = downloadBaselinePart(u._2) val buffer = Array.fill[Byte](1024)(0) - while(i.read(buffer)>0) { + while (i.read(buffer) > 0) { fsDataOutputStream.write(buffer) } i.close() @@ -134,11 +130,11 @@ object SparkCreateBaselineDataFrame { override def zero: PMArticle = new PMArticle override def reduce(b: PMArticle, a: (String, PMArticle)): PMArticle = { - if (b != null && b.getPmid!= null) b else a._2 + if (b != null && b.getPmid != null) b else a._2 } override def merge(b1: PMArticle, b2: PMArticle): PMArticle = { - if (b1 != null && b1.getPmid!= null) b1 else b2 + if (b1 != null && b1.getPmid != null) b1 else b2 } @@ -153,7 +149,7 @@ object SparkCreateBaselineDataFrame { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() val log: Logger = LoggerFactory.getLogger(getClass) - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkEBILinksToOaf.getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkEBILinksToOaf.getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json"))) parser.parseArgument(args) val isLookupUrl: String = parser.get("isLookupUrl") log.info("isLookupUrl: {}", isLookupUrl) @@ -175,32 +171,32 @@ object SparkCreateBaselineDataFrame { .config(conf) .appName(SparkEBILinksToOaf.getClass.getSimpleName) .master(parser.get("master")).getOrCreate() - import spark.implicits._ val sc = spark.sparkContext + import spark.implicits._ - implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) - implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) - implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) - implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) + implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) + implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) + implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri) - val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline",2000) - val ds:Dataset[PMArticle] = spark.createDataset(k.filter(i => i._1.endsWith(".gz")).flatMap(i =>{ + val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline_ftp", 2000) + val ds: Dataset[PMArticle] = spark.createDataset(k.filter(i => i._1.endsWith(".gz")).flatMap(i => { val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes())) new PMParser(xml) - } )) + })) - ds.map(p => (p.getPmid,p))(Encoders.tuple(Encoders.STRING, PMEncoder)).groupByKey(_._1) + ds.map(p => (p.getPmid, p))(Encoders.tuple(Encoders.STRING, PMEncoder)).groupByKey(_._1) .agg(pmArticleAggregator.toColumn) .map(p => p._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_dataset") val exported_dataset = spark.read.load(s"$workingPath/baseline_dataset").as[PMArticle] exported_dataset .map(a => PubMedToOaf.convert(a, vocabularies)).as[Result] - .filter(p => p!= null) + .filter(p => p != null) .write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala similarity index 70% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala index e940fdff0..578db1ea9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala @@ -1,8 +1,8 @@ -package eu.dnetlib.dhp.sx.graph.ebi +package eu.dnetllib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.EBILinkItem -import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal} +import eu.dnetllib.dhp.sx.bio.BioDBToOAF.EBILinkItem +import eu.dnetllib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.HttpGet @@ -14,15 +14,15 @@ import org.slf4j.{Logger, LoggerFactory} object SparkDownloadEBILinks { - def createEBILinks(pmid:Long):EBILinkItem = { + def createEBILinks(pmid: Long): EBILinkItem = { val res = requestLinks(pmid) - if (res!=null) + if (res != null) return EBILinkItem(pmid, res) null } - def requestPage(url:String):String = { + def requestPage(url: String): String = { val r = new HttpGet(url) val timeout = 60; // seconds val config = RequestConfig.custom() @@ -56,10 +56,11 @@ object SparkDownloadEBILinks { } } - def requestLinks(PMID:Long):String = { + def requestLinks(PMID: Long): String = { requestPage(s"https://www.ebi.ac.uk/europepmc/webservices/rest/MED/$PMID/datalinks?format=json") } + def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) @@ -76,9 +77,9 @@ object SparkDownloadEBILinks { import spark.implicits._ - implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) - implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) - implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) + implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) + implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) + implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") @@ -86,29 +87,29 @@ object SparkDownloadEBILinks { log.info(s"workingPath -> $workingPath") log.info("Getting max pubmedId where the links have been requested") - val links:Dataset[EBILinkItem] = spark.read.load(s"$sourcePath/ebi_links_dataset").as[EBILinkItem] - val lastPMIDRequested =links.map(l => l.id).select(max("value")).first.getLong(0) + val links: Dataset[EBILinkItem] = spark.read.load(s"$sourcePath/ebi_links_dataset").as[EBILinkItem] + val lastPMIDRequested = links.map(l => l.id).select(max("value")).first.getLong(0) log.info("Retrieving PMID to request links") val pubmed = spark.read.load(s"$sourcePath/baseline_dataset").as[PMArticle] pubmed.map(p => p.getPmid.toLong).where(s"value > $lastPMIDRequested").write.mode(SaveMode.Overwrite).save(s"$workingPath/id_to_request") - val pmidToReq:Dataset[Long] = spark.read.load(s"$workingPath/id_to_request").as[Long] + val pmidToReq: Dataset[Long] = spark.read.load(s"$workingPath/id_to_request").as[Long] val total = pmidToReq.count() - spark.createDataset(pmidToReq.rdd.repartition((total/MAX_ITEM_PER_PARTITION).toInt).map(pmid =>createEBILinks(pmid)).filter(l => l!= null)).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_update") + spark.createDataset(pmidToReq.rdd.repartition((total / MAX_ITEM_PER_PARTITION).toInt).map(pmid => createEBILinks(pmid)).filter(l => l != null)).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_update") - val updates:Dataset[EBILinkItem] =spark.read.load(s"$workingPath/links_update").as[EBILinkItem] + val updates: Dataset[EBILinkItem] = spark.read.load(s"$workingPath/links_update").as[EBILinkItem] links.union(updates).groupByKey(_.id) - .reduceGroups{(x,y) => - if (x == null || x.links ==null) + .reduceGroups { (x, y) => + if (x == null || x.links == null) y - if (y ==null || y.links ==null) + if (y == null || y.links == null) x if (x.links.length > y.links.length) - x + x else y }.map(_._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_final") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala similarity index 67% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index 1924d919e..0db469769 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -1,15 +1,14 @@ -package eu.dnetlib.dhp.sx.graph.ebi +package eu.dnetllib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf.Oaf -import eu.dnetlib.dhp.sx.graph.bio -import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF -import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.EBILinkItem +import eu.dnetllib.dhp.sx.bio.BioDBToOAF +import eu.dnetllib.dhp.sx.bio.BioDBToOAF.EBILinkItem import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} + object SparkEBILinksToOaf { def main(args: Array[String]): Unit = { @@ -24,17 +23,17 @@ object SparkEBILinksToOaf { .appName(SparkEBILinksToOaf.getClass.getSimpleName) .master(parser.get("master")).getOrCreate() + + import spark.implicits._ val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") + implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) - import spark.implicits._ - implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) + val ebLinks: Dataset[EBILinkItem] = spark.read.load(s"${sourcePath}_dataset").as[EBILinkItem].filter(l => l.links != null) - val ebLinks:Dataset[EBILinkItem] = spark.read.load(s"${sourcePath}_dataset").as[EBILinkItem].filter(l => l.links!= null) - - ebLinks.flatMap(j =>BioDBToOAF.parse_ebi_links(j.links)) + ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) .write.mode(SaveMode.Overwrite).save(targetPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMArticle.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMArticle.java similarity index 97% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMArticle.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMArticle.java index 211cbcffb..305bb89be 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMArticle.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMArticle.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.sx.graph.bio.pubmed; +package eu.dnetllib.dhp.sx.bio.pubmed; import java.io.Serializable; import java.util.ArrayList; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMAuthor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMAuthor.java similarity index 92% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMAuthor.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMAuthor.java index ba69998c5..c89929981 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMAuthor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMAuthor.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.sx.graph.bio.pubmed; +package eu.dnetllib.dhp.sx.bio.pubmed; import java.io.Serializable; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMGrant.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMGrant.java similarity index 93% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMGrant.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMGrant.java index 0c3fd4601..7df5dd5f2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMGrant.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMGrant.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.sx.graph.bio.pubmed; +package eu.dnetllib.dhp.sx.bio.pubmed; public class PMGrant { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMJournal.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMJournal.java similarity index 94% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMJournal.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMJournal.java index d251354d4..6065416f8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMJournal.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMJournal.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.sx.graph.bio.pubmed; +package eu.dnetllib.dhp.sx.bio.pubmed; import java.io.Serializable; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMParser.scala similarity index 99% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMParser.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMParser.scala index 8744bdfb4..8fa226b7d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMParser.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMParser.scala @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.sx.graph.bio.pubmed +package eu.dnetllib.dhp.sx.bio.pubmed import scala.xml.MetaData import scala.xml.pull.{EvElemEnd, EvElemStart, EvText, XMLEventReader} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMSubject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMSubject.java similarity index 94% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMSubject.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMSubject.java index 354b2cbe5..e6ab61b87 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PMSubject.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMSubject.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.sx.graph.bio.pubmed; +package eu.dnetllib.dhp.sx.bio.pubmed; public class PMSubject { private String value; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PubMedToOaf.scala similarity index 93% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PubMedToOaf.scala index 202eb7b14..a1777a230 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -1,11 +1,12 @@ -package eu.dnetlib.dhp.sx.graph.bio.pubmed +package eu.dnetllib.dhp.sx.bio.pubmed + import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} +import eu.dnetlib.dhp.schema.oaf._ +import scala.collection.JavaConverters._ import java.util.regex.Pattern -import scala.collection.JavaConverters._ object PubMedToOaf { @@ -15,7 +16,7 @@ object PubMedToOaf { "doi" -> "https://dx.doi.org/" ) - def cleanDoi(doi:String):String = { + def cleanDoi(doi: String): String = { val regex = "^10.\\d{4,9}\\/[\\[\\]\\-\\<\\>._;()\\/:A-Z0-9]+$" @@ -71,14 +72,14 @@ object PubMedToOaf { if (article.getPublicationTypes == null) return null val i = new Instance - var pidList: List[StructuredProperty] = List(OafMapperUtils.structuredProperty(article.getPmid, PidType.pmid.toString, PidType.pmid.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo)) + val pidList: List[StructuredProperty] = List(OafMapperUtils.structuredProperty(article.getPmid, PidType.pmid.toString, PidType.pmid.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo)) if (pidList == null) return null - var alternateIdentifier :StructuredProperty = null + var alternateIdentifier: StructuredProperty = null if (article.getDoi != null) { val normalizedPid = cleanDoi(article.getDoi) - if (normalizedPid!= null) + if (normalizedPid != null) alternateIdentifier = OafMapperUtils.structuredProperty(normalizedPid, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo) } @@ -102,10 +103,10 @@ object PubMedToOaf { return result result.setDataInfo(dataInfo) i.setPid(pidList.asJava) - if (alternateIdentifier!= null) + if (alternateIdentifier != null) i.setAlternateIdentifier(List(alternateIdentifier).asJava) result.setInstance(List(i).asJava) - i.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection breakOut) + i.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection.breakOut) val urlLists: List[String] = pidList .map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue)) .filter(t => t._1.nonEmpty) @@ -136,7 +137,7 @@ object PubMedToOaf { } - val subjects: List[StructuredProperty] = article.getSubjects.asScala.map(s => OafMapperUtils.structuredProperty(s.getValue, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, dataInfo))(collection breakOut) + val subjects: List[StructuredProperty] = article.getSubjects.asScala.map(s => OafMapperUtils.structuredProperty(s.getValue, SUBJ_CLASS, SUBJ_CLASS, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, dataInfo))(collection.breakOut) if (subjects != null) result.setSubject(subjects.asJava) @@ -148,7 +149,7 @@ object PubMedToOaf { author.setFullname(a.getFullName) author.setRank(index + 1) author - }(collection breakOut) + }(collection.breakOut) if (authors != null && authors.nonEmpty) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/ebi_download_update.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/ebi_download_update.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_to_df_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_to_df_params.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/pubmed/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/pubmed/oozie_app/config-default.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/pubmed/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml similarity index 78% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/pubmed/oozie_app/workflow.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml index 914d1c2c7..f5a98ba5e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/pubmed/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml @@ -1,13 +1,9 @@ - + baselineWorkingPath the Baseline Working Path - - targetPath - the Target Path - isLookupUrl The IS lookUp service endopoint @@ -24,8 +20,8 @@ yarn cluster - Convert Baseline to Dataset - eu.dnetlib.dhp.sx.graph.ebi.SparkCreateBaselineDataFrame + Convert Baseline to OAF Dataset + eu.dnetllib.dhp.sx.bio.ebi.SparkCreateBaselineDataFrame dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -38,9 +34,10 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --workingPath${baselineWorkingPath} - --targetPath${targetPath} + --targetPath${baselineWorkingPath}/transformed --masteryarn --isLookupUrl${isLookupUrl} + --hdfsServerUri${nameNode} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetllib/dhp/sx/bio/BioScholixTest.scala similarity index 92% rename from dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala rename to dhp-workflows/dhp-aggregation/src/test/java/eu/dnetllib/dhp/sx/bio/BioScholixTest.scala index 87279eb21..c072f149c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetllib/dhp/sx/bio/BioScholixTest.scala @@ -1,13 +1,10 @@ -package eu.dnetlib.dhp.sx.graph.bio.pubmed +package eu.dnetllib.dhp.sx.bio import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature} -import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.{CleaningFunctions, OafMapperUtils, PidType} +import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} -import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.ScholixResolved -import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF -import eu.dnetlib.dhp.sx.graph.bio.pubmed.PubMedToOaf.dataInfo -import eu.dnetlib.dhp.sx.graph.ebi.SparkDownloadEBILinks +import eu.dnetllib.dhp.sx.bio.BioDBToOAF.ScholixResolved +import eu.dnetllib.dhp.sx.bio.pubmed.{PMArticle, PMParser, PubMedToOaf} import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse @@ -55,7 +52,7 @@ class BioScholixTest extends AbstractVocabularyTest{ @Test def testEBIData() = { - val inputXML = Source.fromInputStream(getClass.getResourceAsStream("pubmed.xml")).mkString + val inputXML = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml")).mkString val xml = new XMLEventReader(Source.fromBytes(inputXML.getBytes())) new PMParser(xml).foreach(s =>println(mapper.writeValueAsString(s))) } @@ -65,7 +62,7 @@ class BioScholixTest extends AbstractVocabularyTest{ def testPubmedToOaf(): Unit = { assertNotNull(vocabularies) assertTrue(vocabularies.vocabularyExists("dnet:publication_resource")) - val records:String =Source.fromInputStream(getClass.getResourceAsStream("pubmed_dump")).mkString + val records:String =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump")).mkString val r:List[Oaf] = records.lines.toList.map(s=>mapper.readValue(s, classOf[PMArticle])).map(a => PubMedToOaf.convert(a, vocabularies)) assertEquals(10, r.size) assertTrue(r.map(p => p.asInstanceOf[Result]).flatMap(p => p.getInstance().asScala.map(i => i.getInstancetype.getClassid)).exists(p => "0037".equalsIgnoreCase(p))) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/crossref_links b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/crossref_links similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/crossref_links rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/crossref_links diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/ebi_links.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/ebi_links.gz similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/ebi_links.gz rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/ebi_links.gz diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/ls_result similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/ls_result diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pdb_dump b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pdb_dump similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pdb_dump rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pdb_dump diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed_dump b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed_dump rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed_dump diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/scholix_resolved b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/scholix_resolved similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/scholix_resolved rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/scholix_resolved diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/uniprot_dump diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 0d7c74475..23e97a97a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -69,7 +69,7 @@ public class PropagationConstant { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_COUNTRY_INSTREPO_CLASS_ID, PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + ModelConstants.DNET_PROVENANCE_ACTIONS)); return nc; } @@ -84,7 +84,8 @@ public class PropagationConstant { return di; } - public static Qualifier getQualifier(String inference_class_id, String inference_class_name, String qualifierSchema) { + public static Qualifier getQualifier(String inference_class_id, String inference_class_name, + String qualifierSchema) { Qualifier pa = new Qualifier(); pa.setClassid(inference_class_id); pa.setClassname(inference_class_name); @@ -108,7 +109,11 @@ public class PropagationConstant { r.setRelClass(rel_class); r.setRelType(rel_type); r.setSubRelType(subrel_type); - r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name, ModelConstants.DNET_PROVENANCE_ACTIONS)); + r + .setDataInfo( + getDataInfo( + inference_provenance, inference_class_id, inference_class_name, + ModelConstants.DNET_PROVENANCE_ACTIONS)); return r; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 68949b900..a38b4da2e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -173,14 +173,17 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); + p + .setQualifier( + getQualifier( + ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); p .setDataInfo( getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS)); + ModelConstants.DNET_PROVENANCE_ACTIONS)); Optional> authorPid = Optional.ofNullable(author.getPid()); if (authorPid.isPresent()) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 1289ff644..50df08f8c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -10,7 +10,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -22,6 +21,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; @@ -130,7 +130,7 @@ public class SparkResultToCommunityFromOrganizationJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS))); + ModelConstants.DNET_PROVENANCE_ACTIONS))); propagatedContexts.add(newContext); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 7f76ead94..f31a26230 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -20,6 +19,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import scala.Tuple2; @@ -126,7 +126,7 @@ public class SparkResultToCommunityThroughSemRelJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, - ModelConstants.DNET_PROVENANCE_ACTIONS))); + ModelConstants.DNET_PROVENANCE_ACTIONS))); return newContext; } return null; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala index bf726cf59..79c75d6df 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/pangaea/SparkGeneratePanagaeaDataset.scala @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.sx.graph.pangaea import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.sx.graph.ebi.SparkEBILinksToOaf import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index 1c7dce3f2..64935e79d 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -84,13 +84,15 @@ public class IndexRecordTransformerTest { @Test public void testForEOSCFutureTraining() throws IOException, TransformerException { - final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/training-notebooks-seadatanet.xml")); + final String record = IOUtils + .toString(getClass().getResourceAsStream("eosc-future/training-notebooks-seadatanet.xml")); testRecordTransformation(record); } @Test public void testForEOSCFutureAirQualityCopernicus() throws IOException, TransformerException { - final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/air-quality-copernicus.xml")); + final String record = IOUtils + .toString(getClass().getResourceAsStream("eosc-future/air-quality-copernicus.xml")); testRecordTransformation(record); } @@ -102,12 +104,11 @@ public class IndexRecordTransformerTest { @Test public void testForEOSCFutureB2SharePlotRelatedORP() throws IOException, TransformerException { - final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/b2share-plot-related-orp.xml")); + final String record = IOUtils + .toString(getClass().getResourceAsStream("eosc-future/b2share-plot-related-orp.xml")); testRecordTransformation(record); } - - private void testRecordTransformation(final String record) throws IOException, TransformerException { final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); From 83f51f1812212cd7d5587cf6e5597945ae619b5c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 12 Oct 2021 09:14:43 +0200 Subject: [PATCH 270/287] refactoring --- .../dump/complete/DumpOrganizationProjectDatasourceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java index 051141e18..80b5fd89e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java @@ -6,7 +6,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; -import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -23,6 +22,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Project; From 511da98d0ce3c4f7c046799d18f7398367c755fa Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 12 Oct 2021 11:47:49 +0200 Subject: [PATCH 271/287] - fixed bug on download pmc Article - removed unused line of code in SparkCreateActionset --- .../dhp/actionmanager/scholix/SparkCreateActionset.scala | 4 ---- .../dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala | 8 ++------ .../eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml | 8 ++++++-- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala index b78f411ee..7a87861db 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala @@ -60,14 +60,10 @@ object SparkCreateActionset { val entities: Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders)) - - entities.filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) entities .joinWith(idRelation, entities("_1").equalTo(idRelation("value"))) .map(p => p._1._2) .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") - - } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 97b3cdc99..2fc9623a8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -114,11 +114,7 @@ object SparkCreateBaselineDataFrame { val hdfsWritePath: Path = new Path(s"$baselinePath/${u._1}") val fsDataOutputStream: FSDataOutputStream = fs.create(hdfsWritePath, true) val i = downloadBaselinePart(u._2) - val buffer = Array.fill[Byte](1024)(0) - while (i.read(buffer) > 0) { - fsDataOutputStream.write(buffer) - } - i.close() + IOUtils.copy(i, fsDataOutputStream) println(s"Downloaded ${u._2} into $baselinePath/${u._1}") fsDataOutputStream.close() } @@ -182,7 +178,7 @@ object SparkCreateBaselineDataFrame { downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri) - val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline_ftp", 2000) + val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline", 2000) val ds: Dataset[PMArticle] = spark.createDataset(k.filter(i => i._1.endsWith(".gz")).flatMap(i => { val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes())) new PMParser(xml) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml index f5a98ba5e..4ed6dd8bf 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml @@ -8,6 +8,10 @@ isLookupUrl The IS lookUp service endopoint + + targetPath + The target path + @@ -22,7 +26,7 @@ cluster Convert Baseline to OAF Dataset eu.dnetllib.dhp.sx.bio.ebi.SparkCreateBaselineDataFrame - dhp-graph-mapper-${projectVersion}.jar + dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -34,7 +38,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --workingPath${baselineWorkingPath} - --targetPath${baselineWorkingPath}/transformed + --targetPath${targetPath} --masteryarn --isLookupUrl${isLookupUrl} --hdfsServerUri${nameNode} From 914b3e92cb2301fb86a8b5c1942cc01a9cb90d0f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 12 Oct 2021 12:00:45 +0200 Subject: [PATCH 272/287] updating graph schema module dependency to version 2.8.20 to include organization parent/child relation constants --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 61b0ad873..3e8d6bc19 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.18] + [2.8.20] [4.0.3] [6.0.5] [3.1.6] From 7387416e909bad95b95b2303f5b6bdff76e47749 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 12 Oct 2021 12:36:30 +0200 Subject: [PATCH 273/287] added params skip update to direct transform in OAF, this should be set to true in production --- .../ebi/SparkCreateBaselineDataFrame.scala | 28 ++++++++++--------- .../sx/bio/ebi/baseline_to_oaf_params.json | 1 + .../dhp/sx/bio/pubmed/oozie_app/workflow.xml | 6 ++++ 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 2fc9623a8..17bf3fa6b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -32,7 +32,7 @@ object SparkCreateBaselineDataFrame { val start = l.indexOf("= 0 && end > start) - l.substring(start + 9, (end - start)) + l.substring(start + 9, end - start) else "" }.filter(s => s.endsWith(".gz")).filter(s => s > maxFile).map(s => (s, s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s")).toList @@ -158,6 +158,9 @@ object SparkCreateBaselineDataFrame { val hdfsServerUri = parser.get("hdfsServerUri") log.info("hdfsServerUri: {}", targetPath) + val skipUpdate = parser.get("skipUpdate") + log.info("skipUpdate: {}", skipUpdate) + val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) @@ -176,18 +179,17 @@ object SparkCreateBaselineDataFrame { implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) - downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri) - - val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline", 2000) - val ds: Dataset[PMArticle] = spark.createDataset(k.filter(i => i._1.endsWith(".gz")).flatMap(i => { - val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes())) - new PMParser(xml) - - })) - - ds.map(p => (p.getPmid, p))(Encoders.tuple(Encoders.STRING, PMEncoder)).groupByKey(_._1) - .agg(pmArticleAggregator.toColumn) - .map(p => p._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_dataset") + if (!"true".equalsIgnoreCase(skipUpdate)) { + downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri) + val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline", 2000) + val ds: Dataset[PMArticle] = spark.createDataset(k.filter(i => i._1.endsWith(".gz")).flatMap(i => { + val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes())) + new PMParser(xml) + })) + ds.map(p => (p.getPmid, p))(Encoders.tuple(Encoders.STRING, PMEncoder)).groupByKey(_._1) + .agg(pmArticleAggregator.toColumn) + .map(p => p._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_dataset") + } val exported_dataset = spark.read.load(s"$workingPath/baseline_dataset").as[PMArticle] exported_dataset diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json index 4bee770bd..8dc8a2aae 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json @@ -3,5 +3,6 @@ {"paramName":"i", "paramLongName":"isLookupUrl", "paramDescription": "isLookupUrl", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}, {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the oaf path ", "paramRequired": true}, + {"paramName":"s", "paramLongName":"skipUpdate", "paramDescription": "skip update ", "paramRequired": false}, {"paramName":"h", "paramLongName":"hdfsServerUri", "paramDescription": "the working path ", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml index 4ed6dd8bf..21fd2d153 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml @@ -12,6 +12,11 @@ targetPath The target path + + skipUpdate + false + The request block size + @@ -42,6 +47,7 @@ --masteryarn --isLookupUrl${isLookupUrl} --hdfsServerUri${nameNode} + --skipUpdate${skipUpdate} From 63933808d4e89b683f06036c78170e4be37d36a2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 13 Oct 2021 11:28:28 +0200 Subject: [PATCH 274/287] added fix for mixing result types, added configuration default to funder subworkflow --- .../dhp/oa/graph/dump/DumpProducts.java | 3 +- .../oa/graph/dump/QueryInformationSystem.java | 2 +- .../dhp/oa/graph/dump/ResultMapper.java | 682 +++++++++--------- .../dhp/oa/graph/dump/SaveCommunityMap.java | 8 +- .../oa/graph/dump/input_cm_parameters.json | 2 +- .../funder/oozie_app/config-default.xml | 30 + 6 files changed, 389 insertions(+), 338 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java index 68e1e8402..a714cea20 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java @@ -11,6 +11,7 @@ import java.util.Set; import java.util.stream.Collectors; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -57,7 +58,7 @@ public class DumpProducts implements Serializable { Utils .readPath(spark, inputPath, inputClazz) .map((MapFunction) value -> execMap(value, communityMap, dumpType), Encoders.bean(outputClazz)) - .filter(Objects::nonNull) + .filter((FilterFunction) value -> value != null) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index b972de6e9..ab5434ed0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -62,7 +62,7 @@ public class QueryInformationSystem { for (String xml : communityMap) { final Document doc; final SAXReader reader = new SAXReader(); - reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + // reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); doc = reader.read(new StringReader(xml)); Element root = doc.getRootElement(); map.put(root.attribute("id").getValue(), root.attribute("label").getValue()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 0f3192c25..49468540d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -40,355 +40,371 @@ public class ResultMapper implements Serializable { eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in; Optional ort = Optional.ofNullable(input.getResulttype()); if (ort.isPresent()) { - switch (ort.get().getClassid()) { - case "publication": - Optional journal = Optional - .ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal()); - if (journal.isPresent()) { - Journal j = journal.get(); - Container c = new Container(); - c.setConferencedate(j.getConferencedate()); - c.setConferenceplace(j.getConferenceplace()); - c.setEdition(j.getEdition()); - c.setEp(j.getEp()); - c.setIss(j.getIss()); - c.setIssnLinking(j.getIssnLinking()); - c.setIssnOnline(j.getIssnOnline()); - c.setIssnPrinted(j.getIssnPrinted()); - c.setName(j.getName()); - c.setSp(j.getSp()); - c.setVol(j.getVol()); - out.setContainer(c); - out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname()); - } - break; - case "dataset": - eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input; - Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue())); - Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue())); + try { - out - .setGeolocation( - Optional - .ofNullable(id.getGeolocation()) - .map( - igl -> igl - .stream() - .filter(Objects::nonNull) - .map(gli -> { - GeoLocation gl = new GeoLocation(); - gl.setBox(gli.getBox()); - gl.setPlace(gli.getPlace()); - gl.setPoint(gli.getPoint()); - return gl; - }) - .collect(Collectors.toList())) - .orElse(null)); - - out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname()); - break; - case "software": - - eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input; - Optional - .ofNullable(is.getCodeRepositoryUrl()) - .ifPresent(value -> out.setCodeRepositoryUrl(value.getValue())); - Optional - .ofNullable(is.getDocumentationUrl()) - .ifPresent( - value -> out - .setDocumentationUrl( - value - .stream() - .map(Field::getValue) - .collect(Collectors.toList()))); - - Optional - .ofNullable(is.getProgrammingLanguage()) - .ifPresent(value -> out.setProgrammingLanguage(value.getClassid())); - - out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname()); - break; - case "other": - - eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input; - out - .setContactgroup( - Optional - .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - - out - .setContactperson( - Optional - .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - out - .setTool( - Optional - .ofNullable(ir.getTool()) - .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) - .orElse(null)); - - out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); - - break; - default: - throw new NoAvailableEntityTypeException(); - } - - Optional> mes = Optional.ofNullable(input.getMeasures()); - if (mes.isPresent()) { - List measure = new ArrayList<>(); - mes - .get() - .forEach( - m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); - out.setMeasures(measure); - } - - Optional - .ofNullable(input.getAuthor()) - .ifPresent( - ats -> out.setAuthor(ats.stream().map(ResultMapper::getAuthor).collect(Collectors.toList()))); - - // I do not map Access Right UNKNOWN or OTHER - - Optional oar = Optional.ofNullable(input.getBestaccessright()); - if (oar.isPresent() && Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); - out - .setBestaccessright( - AccessRight - .newInstance( - code, - Constants.coarCodeLabelMap.get(code), - Constants.COAR_ACCESS_RIGHT_SCHEMA)); - } - - final List contributorList = new ArrayList<>(); - Optional - .ofNullable(input.getContributor()) - .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue()))); - out.setContributor(contributorList); - - Optional - .ofNullable(input.getCountry()) - .ifPresent( - value -> out - .setCountry( - value - .stream() - .map( - c -> { - if (c.getClassid().equals((ModelConstants.UNKNOWN))) { - return null; - } - Country country = new Country(); - country.setCode(c.getClassid()); - country.setLabel(c.getClassname()); - Optional - .ofNullable(c.getDataInfo()) - .ifPresent( - provenance -> country - .setProvenance( - Provenance - .newInstance( - provenance - .getProvenanceaction() - .getClassname(), - c.getDataInfo().getTrust()))); - return country; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList()))); - - final List coverageList = new ArrayList<>(); - Optional - .ofNullable(input.getCoverage()) - .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue()))); - out.setCoverage(coverageList); - - out.setDateofcollection(input.getDateofcollection()); - - final List descriptionList = new ArrayList<>(); - Optional - .ofNullable(input.getDescription()) - .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); - out.setDescription(descriptionList); - Optional> oStr = Optional.ofNullable(input.getEmbargoenddate()); - if (oStr.isPresent()) { - out.setEmbargoenddate(oStr.get().getValue()); - } - - final List formatList = new ArrayList<>(); - Optional - .ofNullable(input.getFormat()) - .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); - out.setFormat(formatList); - out.setId(input.getId()); - out.setOriginalId(input.getOriginalId()); - - Optional> oInst = Optional - .ofNullable(input.getInstance()); - - if (oInst.isPresent()) { - if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { - ((GraphResult) out) - .setInstance( - oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); - } else { - ((CommunityResult) out) - .setInstance( - oInst.get().stream().map(ResultMapper::getCommunityInstance).collect(Collectors.toList())); - } - } - - Optional oL = Optional.ofNullable(input.getLanguage()); - if (oL.isPresent()) { - eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get(); - out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname())); - } - Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp()); - if (oLong.isPresent()) { - out.setLastupdatetimestamp(oLong.get()); - } - Optional> otitle = Optional.ofNullable(input.getTitle()); - if (otitle.isPresent()) { - List iTitle = otitle - .get() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setMaintitle(iTitle.get(0).getValue()); + addTypeSpecificInformation(out, input, ort); + Optional> mes = Optional.ofNullable(input.getMeasures()); + if (mes.isPresent()) { + List measure = new ArrayList<>(); + mes + .get() + .forEach( + m -> m.getUnit().forEach(u -> measure.add(KeyValue.newInstance(m.getId(), u.getValue())))); + out.setMeasures(measure); } - iTitle = otitle - .get() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setSubtitle(iTitle.get(0).getValue()); + Optional + .ofNullable(input.getAuthor()) + .ifPresent( + ats -> out.setAuthor(ats.stream().map(ResultMapper::getAuthor).collect(Collectors.toList()))); + + // I do not map Access Right UNKNOWN or OTHER + + Optional oar = Optional.ofNullable(input.getBestaccessright()); + if (oar.isPresent() && Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) { + String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); + out + .setBestaccessright( + AccessRight + .newInstance( + code, + Constants.coarCodeLabelMap.get(code), + Constants.COAR_ACCESS_RIGHT_SCHEMA)); } - } + final List contributorList = new ArrayList<>(); + Optional + .ofNullable(input.getContributor()) + .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue()))); + out.setContributor(contributorList); - Optional - .ofNullable(input.getPid()) - .ifPresent( - value -> out - .setPid( - value - .stream() - .map( - p -> ControlledField - .newInstance(p.getQualifier().getClassid(), p.getValue())) - .collect(Collectors.toList()))); - - oStr = Optional.ofNullable(input.getDateofacceptance()); - if (oStr.isPresent()) { - out.setPublicationdate(oStr.get().getValue()); - } - oStr = Optional.ofNullable(input.getPublisher()); - if (oStr.isPresent()) { - out.setPublisher(oStr.get().getValue()); - } - - Optional - .ofNullable(input.getSource()) - .ifPresent(value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList()))); - - List subjectList = new ArrayList<>(); - Optional - .ofNullable(input.getSubject()) - .ifPresent( - value -> value - .forEach(s -> subjectList.add(getSubject(s)))); - - out.setSubjects(subjectList); - - out.setType(input.getResulttype().getClassid()); - } - - if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { - ((CommunityResult) out) - .setCollectedfrom( - input - .getCollectedfrom() - .stream() - .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue())) - .collect(Collectors.toList())); - - Set communities = communityMap.keySet(); - List contextList = Optional - .ofNullable( - input - .getContext()) - .map( - value -> value - .stream() - .map(c -> { - String communityId = c.getId(); - if (communityId.contains("::")) { - communityId = communityId.substring(0, communityId.indexOf("::")); - } - if (communities.contains(communityId)) { - Context context = new Context(); - context.setCode(communityId); - context.setLabel(communityMap.get(communityId)); - Optional> dataInfo = Optional.ofNullable(c.getDataInfo()); - if (dataInfo.isPresent()) { - List provenance = new ArrayList<>(); - provenance - .addAll( - dataInfo - .get() - .stream() - .map( - di -> Optional - .ofNullable(di.getProvenanceaction()) - .map( - provenanceaction -> Provenance + Optional + .ofNullable(input.getCountry()) + .ifPresent( + value -> out + .setCountry( + value + .stream() + .map( + c -> { + if (c.getClassid().equals((ModelConstants.UNKNOWN))) { + return null; + } + Country country = new Country(); + country.setCode(c.getClassid()); + country.setLabel(c.getClassname()); + Optional + .ofNullable(c.getDataInfo()) + .ifPresent( + provenance -> country + .setProvenance( + Provenance .newInstance( - provenanceaction.getClassname(), di.getTrust())) - .orElse(null)) - .filter(Objects::nonNull) - .collect(Collectors.toSet())); + provenance + .getProvenanceaction() + .getClassname(), + c.getDataInfo().getTrust()))); + return country; + }) + .filter(Objects::nonNull) + .collect(Collectors.toList()))); - try { - context.setProvenance(getUniqueProvenance(provenance)); - } catch (NoAvailableEntityTypeException e) { - e.printStackTrace(); - } - } - return context; - } - return null; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList())) - .orElse(new ArrayList<>()); + final List coverageList = new ArrayList<>(); + Optional + .ofNullable(input.getCoverage()) + .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue()))); + out.setCoverage(coverageList); - if (!contextList.isEmpty()) { - Set hashValue = new HashSet<>(); - List remainigContext = new ArrayList<>(); - contextList.forEach(c -> { - if (!hashValue.contains(c.hashCode())) { - remainigContext.add(c); - hashValue.add(c.hashCode()); + out.setDateofcollection(input.getDateofcollection()); + + final List descriptionList = new ArrayList<>(); + Optional + .ofNullable(input.getDescription()) + .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); + out.setDescription(descriptionList); + Optional> oStr = Optional.ofNullable(input.getEmbargoenddate()); + if (oStr.isPresent()) { + out.setEmbargoenddate(oStr.get().getValue()); + } + + final List formatList = new ArrayList<>(); + Optional + .ofNullable(input.getFormat()) + .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); + out.setFormat(formatList); + out.setId(input.getId()); + out.setOriginalId(input.getOriginalId()); + + Optional> oInst = Optional + .ofNullable(input.getInstance()); + + if (oInst.isPresent()) { + if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { + ((GraphResult) out) + .setInstance( + oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); + } else { + ((CommunityResult) out) + .setInstance( + oInst + .get() + .stream() + .map(ResultMapper::getCommunityInstance) + .collect(Collectors.toList())); } - }); - ((CommunityResult) out).setContext(remainigContext); + } + + Optional oL = Optional.ofNullable(input.getLanguage()); + if (oL.isPresent()) { + eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get(); + out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname())); + } + Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp()); + if (oLong.isPresent()) { + out.setLastupdatetimestamp(oLong.get()); + } + Optional> otitle = Optional.ofNullable(input.getTitle()); + if (otitle.isPresent()) { + List iTitle = otitle + .get() + .stream() + .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) + .collect(Collectors.toList()); + if (!iTitle.isEmpty()) { + out.setMaintitle(iTitle.get(0).getValue()); + } + + iTitle = otitle + .get() + .stream() + .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) + .collect(Collectors.toList()); + if (!iTitle.isEmpty()) { + out.setSubtitle(iTitle.get(0).getValue()); + } + + } + + Optional + .ofNullable(input.getPid()) + .ifPresent( + value -> out + .setPid( + value + .stream() + .map( + p -> ControlledField + .newInstance(p.getQualifier().getClassid(), p.getValue())) + .collect(Collectors.toList()))); + + oStr = Optional.ofNullable(input.getDateofacceptance()); + if (oStr.isPresent()) { + out.setPublicationdate(oStr.get().getValue()); + } + oStr = Optional.ofNullable(input.getPublisher()); + if (oStr.isPresent()) { + out.setPublisher(oStr.get().getValue()); + } + + Optional + .ofNullable(input.getSource()) + .ifPresent( + value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList()))); + + List subjectList = new ArrayList<>(); + Optional + .ofNullable(input.getSubject()) + .ifPresent( + value -> value + .forEach(s -> subjectList.add(getSubject(s)))); + + out.setSubjects(subjectList); + + out.setType(input.getResulttype().getClassid()); + + if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { + ((CommunityResult) out) + .setCollectedfrom( + input + .getCollectedfrom() + .stream() + .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue())) + .collect(Collectors.toList())); + + Set communities = communityMap.keySet(); + List contextList = Optional + .ofNullable( + input + .getContext()) + .map( + value -> value + .stream() + .map(c -> { + String communityId = c.getId(); + if (communityId.contains("::")) { + communityId = communityId.substring(0, communityId.indexOf("::")); + } + if (communities.contains(communityId)) { + Context context = new Context(); + context.setCode(communityId); + context.setLabel(communityMap.get(communityId)); + Optional> dataInfo = Optional.ofNullable(c.getDataInfo()); + if (dataInfo.isPresent()) { + List provenance = new ArrayList<>(); + provenance + .addAll( + dataInfo + .get() + .stream() + .map( + di -> Optional + .ofNullable(di.getProvenanceaction()) + .map( + provenanceaction -> Provenance + .newInstance( + provenanceaction.getClassname(), + di.getTrust())) + .orElse(null)) + .filter(Objects::nonNull) + .collect(Collectors.toSet())); + + try { + context.setProvenance(getUniqueProvenance(provenance)); + } catch (NoAvailableEntityTypeException e) { + e.printStackTrace(); + } + } + return context; + } + return null; + }) + .filter(Objects::nonNull) + .collect(Collectors.toList())) + .orElse(new ArrayList<>()); + + if (!contextList.isEmpty()) { + Set hashValue = new HashSet<>(); + List remainigContext = new ArrayList<>(); + contextList.forEach(c -> { + if (!hashValue.contains(c.hashCode())) { + remainigContext.add(c); + hashValue.add(c.hashCode()); + } + }); + ((CommunityResult) out).setContext(remainigContext); + } + } + } catch (ClassCastException cce) { + return out; } } + return out; } + private static void addTypeSpecificInformation(Result out, eu.dnetlib.dhp.schema.oaf.Result input, + Optional ort) throws NoAvailableEntityTypeException { + switch (ort.get().getClassid()) { + case "publication": + Optional journal = Optional + .ofNullable(((Publication) input).getJournal()); + if (journal.isPresent()) { + Journal j = journal.get(); + Container c = new Container(); + c.setConferencedate(j.getConferencedate()); + c.setConferenceplace(j.getConferenceplace()); + c.setEdition(j.getEdition()); + c.setEp(j.getEp()); + c.setIss(j.getIss()); + c.setIssnLinking(j.getIssnLinking()); + c.setIssnOnline(j.getIssnOnline()); + c.setIssnPrinted(j.getIssnPrinted()); + c.setName(j.getName()); + c.setSp(j.getSp()); + c.setVol(j.getVol()); + out.setContainer(c); + out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname()); + } + break; + case "dataset": + Dataset id = (Dataset) input; + Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue())); + Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue())); + + out + .setGeolocation( + Optional + .ofNullable(id.getGeolocation()) + .map( + igl -> igl + .stream() + .filter(Objects::nonNull) + .map(gli -> { + GeoLocation gl = new GeoLocation(); + gl.setBox(gli.getBox()); + gl.setPlace(gli.getPlace()); + gl.setPoint(gli.getPoint()); + return gl; + }) + .collect(Collectors.toList())) + .orElse(null)); + + out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname()); + break; + case "software": + + Software is = (Software) input; + Optional + .ofNullable(is.getCodeRepositoryUrl()) + .ifPresent(value -> out.setCodeRepositoryUrl(value.getValue())); + Optional + .ofNullable(is.getDocumentationUrl()) + .ifPresent( + value -> out + .setDocumentationUrl( + value + .stream() + .map(Field::getValue) + .collect(Collectors.toList()))); + + Optional + .ofNullable(is.getProgrammingLanguage()) + .ifPresent(value -> out.setProgrammingLanguage(value.getClassid())); + + out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname()); + break; + case "other": + + OtherResearchProduct ir = (OtherResearchProduct) input; + out + .setContactgroup( + Optional + .ofNullable(ir.getContactgroup()) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) + .orElse(null)); + + out + .setContactperson( + Optional + .ofNullable(ir.getContactperson()) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) + .orElse(null)); + out + .setTool( + Optional + .ofNullable(ir.getTool()) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) + .orElse(null)); + + out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); + + break; + default: + throw new NoAvailableEntityTypeException(); + } + } + private static Instance getGraphInstance(eu.dnetlib.dhp.schema.oaf.Instance i) { Instance instance = new Instance(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index 1eff99892..a6595d756 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** @@ -86,10 +87,13 @@ public class SaveCommunityMap implements Serializable { private void saveCommunityMap(boolean singleCommunity, String communityId) throws ISLookUpException, IOException, DocumentException, SAXException { + final String communityMapString = Utils.OBJECT_MAPPER + .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId)); + log.info("communityMap {} ", communityMapString); writer .write( - Utils.OBJECT_MAPPER - .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId))); + communityMapString); + writer.close(); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json index 806220510..677715847 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json @@ -30,7 +30,7 @@ "paramLongName": "communityId", "paramDescription": "the id of the community for which to create the dump", "paramRequired": true - } } + } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml index e69de29bb..e5ec3d0ae 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file From 16b28494a940e5be42874ba5ef1e91778d6bd2bd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 13 Oct 2021 11:34:24 +0200 Subject: [PATCH 275/287] added new parameter in the doiboost process workflow to specify a folder for the process of MAG dataset --- .../dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index eb82c3a7d..282c06f4e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -48,7 +48,11 @@ inputPathMAG - the MAG working path + the MAG input path + + + workingPathMAG + the MAG working path where to store the intermediate process results @@ -138,7 +142,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${inputPathMAG}/dataset - --workingPath${inputPathMAG}/process_p + --workingPath${workingPathMAG} --targetPath${workingPath} --masteryarn-cluster From 5d9cc2452dc76452fbb45f326315ec5061f2feae Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 13 Oct 2021 15:33:50 +0200 Subject: [PATCH 276/287] changed the working path parameter value as dependant from the dnet-workflow working dir parameter --- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index 282c06f4e..29a12f4df 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -50,10 +50,6 @@ inputPathMAG the MAG input path - - workingPathMAG - the MAG working path where to store the intermediate process results - @@ -142,7 +138,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${inputPathMAG}/dataset - --workingPath${workingPathMAG} + --workingPath${workingPath}/MAG --targetPath${workingPath} --masteryarn-cluster From b292e4a700eed70b8a94542245610b88f53e9ac3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 13 Oct 2021 17:31:53 +0200 Subject: [PATCH 277/287] [stats wf] added extra logging in the context data retrieval phase --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh index 6d42ab13d..c60d67b1a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh @@ -11,9 +11,13 @@ TARGET_DB=$2 TMP=/tmp/stats-update-`tr -dc A-Za-z0-9 contexts.csv + +echo "Downloading categories data" cat contexts.csv | cut -d , -f1 | xargs -I {} curl -L ${CONTEXT_API}/context/{}/?all=true | /usr/local/sbin/jq -r '.[]|"\(.id|split(":")[0]),\(.id),\(.label)"' > categories.csv + +echo "Downloading concepts data" cat categories.csv | cut -d , -f2 | sed 's/:/%3A/g'| xargs -I {} curl -L ${CONTEXT_API}/context/category/{}/?all=true | /usr/local/sbin/jq -r '.[]|"\(.id|split("::")[0])::\(.id|split("::")[1]),\(.id),\(.label)"' > concepts.csv cat contexts.csv | sed 's/^\(.*\),\(.*\)/\1,\1::other,\2/' >> categories.csv cat categories.csv | sed 's/^.*,\(.*\),\(.*\)/\1,\1::other,\2/' >> concepts.csv From 51a03c0a50bd1a546865892698f840a97479b16e Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 14 Oct 2021 14:23:05 +0200 Subject: [PATCH 278/287] refactor code for EBI from dhp-graph-mapper into dhp-aggregation --- .../sx/bio/ebi/SparkDownloadEBILinks.scala | 4 +- .../dhp/sx/bio/ebi/SparkEBILinksToOaf.scala | 4 +- .../sx/bio}/ebi/oozie_app/config-default.xml | 0 .../dhp/sx/bio/ebi/oozie_app/workflow.xml | 105 ++++++++++++++++++ .../dhp/sx/graph/ebi/oozie_app/workflow.xml | 99 ----------------- .../ebi/update/oozie_app/config-default.xml | 68 ------------ .../graph/ebi/update/oozie_app/workflow.xml | 67 ----------- 7 files changed, 109 insertions(+), 238 deletions(-) rename dhp-workflows/{dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph => dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio}/ebi/oozie_app/config-default.xml (100%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala index 578db1ea9..85fbd99c4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala @@ -66,7 +66,7 @@ object SparkDownloadEBILinks { val log: Logger = LoggerFactory.getLogger(getClass) val MAX_ITEM_PER_PARTITION = 20000 val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/ebi_download_update.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession @@ -86,7 +86,7 @@ object SparkDownloadEBILinks { val workingPath = parser.get("workingPath") log.info(s"workingPath -> $workingPath") - log.info("Getting max pubmedId where the links have been requested") + log.info("Getting max pubmedId where the links have already requested") val links: Dataset[EBILinkItem] = spark.read.load(s"$sourcePath/ebi_links_dataset").as[EBILinkItem] val lastPMIDRequested = links.map(l => l.id).select(max("value")).first.getLong(0) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index 0db469769..10467884c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -14,7 +14,7 @@ object SparkEBILinksToOaf { def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/ebi/ebi_to_df_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/ebi_to_df_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession @@ -31,7 +31,7 @@ object SparkEBILinksToOaf { log.info(s"targetPath -> $targetPath") implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) - val ebLinks: Dataset[EBILinkItem] = spark.read.load(s"${sourcePath}_dataset").as[EBILinkItem].filter(l => l.links != null) + val ebLinks: Dataset[EBILinkItem] = spark.read.load(sourcePath).as[EBILinkItem].filter(l => l.links != null && l.links.startsWith("{")) ebLinks.flatMap(j => BioDBToOAF.parse_ebi_links(j.links)) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/config-default.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml new file mode 100644 index 000000000..73b1a3b60 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/oozie_app/workflow.xml @@ -0,0 +1,105 @@ + + + + sourcePath + the Working Path + + + workingPath + the Working Path + + + targetPath + the OAF MDStore Path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + resumeFrom + DownloadEBILinks + node to start + + + + + + + + ${wf:conf('resumeFrom') eq 'DownloadEBILinks'} + ${wf:conf('resumeFrom') eq 'CreateEBIDataSet'} + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn-cluster + cluster + Incremental Download EBI Links + eu.dnetllib.dhp.sx.bio.ebi.SparkDownloadEBILinks + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=2000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --workingPath${workingPath} + --masteryarn + + + + + + + + + + + + + + + + yarn-cluster + cluster + Create OAF DataSet + eu.dnetllib.dhp.sx.bio.ebi.SparkEBILinksToOaf + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=2000 + ${sparkExtraOPT} + + --sourcePath${sourcePath}/ebi_links_dataset + --targetPath${targetPath} + --masteryarn + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml deleted file mode 100644 index 7612321c0..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml +++ /dev/null @@ -1,99 +0,0 @@ - - - - workingPath - the Working Path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - yarn-cluster - cluster - Create Baselnie DataSet - - eu.dnetlib.dhp.sx.ebi.SparkCreateBaselineDataFrame - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=1 - --driver-memory=${sparkDriverMemory} - --executor-cores=${sparkExecutorCores} - ${sparkExtraOPT} - - --workingPath${workingPath} - --masteryarn - --hdfsServerUri${nameNode} - - - - - - - - yarn-cluster - cluster - Create EBI DataSet - - eu.dnetlib.dhp.sx.ebi.SparkEBILinksToOaf - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=1000 - ${sparkExtraOPT} - - --workingPath${workingPath} - --masteryarn - - - - - - - - yarn-cluster - cluster - Create Baseline DataSet - - eu.dnetlib.dhp.sx.ebi.SparkAddLinkUpdates - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=1 - --driver-memory=${sparkDriverMemory} - --executor-cores=${sparkExecutorCores} - ${sparkExtraOPT} - - --workingPath${workingPath} - --masteryarn - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml deleted file mode 100644 index 17cd6c9a3..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml +++ /dev/null @@ -1,68 +0,0 @@ - - - - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - - - - - - - - - - - - - - - - - - - - - - - oozie.launcher.mapreduce.user.classpath.first - true - - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml deleted file mode 100644 index cd3bb8c71..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml +++ /dev/null @@ -1,67 +0,0 @@ - - - - sourcePath - the Working Path - - - workingPath - the Working Path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - yarn-cluster - cluster - Incremental Download EBI Links - eu.dnetlib.dhp.sx.graph.ebi.SparkDownloadEBILinks - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=2000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath} - --workingPath${workingPath} - --masteryarn - - - - - - - - - - - - - - - \ No newline at end of file From 7b15b88d4cdc60daf7cc7e60badea8f844abb57e Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 15 Oct 2021 15:00:15 +0200 Subject: [PATCH 279/287] renamed wrong package, implemented last aggregation workflow for scholexplorer --- .../dhp/sx/bio/BioDBToOAF.scala | 2 +- .../bio/SparkTransformBioDatabaseToOAF.scala | 6 +- .../ebi/SparkCreateBaselineDataFrame.scala | 4 +- .../sx/bio/ebi/SparkDownloadEBILinks.scala | 7 +- .../dhp/sx/bio/ebi/SparkEBILinksToOaf.scala | 7 +- .../dhp/sx/bio/pubmed/PMArticle.java | 2 +- .../dhp/sx/bio/pubmed/PMAuthor.java | 2 +- .../dhp/sx/bio/pubmed/PMGrant.java | 2 +- .../dhp/sx/bio/pubmed/PMJournal.java | 2 +- .../dhp/sx/bio/pubmed/PMParser.scala | 2 +- .../dhp/sx/bio/pubmed/PMSubject.java | 2 +- .../dhp/sx/bio/pubmed/PubMedToOaf.scala | 2 +- .../sx/bio/db}/oozie_app/config-default.xml | 0 .../dhp/sx/bio/db/oozie_app/workflow.xml | 51 +++++ .../dhp/sx/bio/ebi}/bio_to_oaf_params.json | 0 .../dhp/sx/bio/BioScholixTest.scala | 6 +- .../dhp/sx/graph/bio/oozie_app/workflow.xml | 177 ------------------ 17 files changed, 75 insertions(+), 199 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/BioDBToOAF.scala (99%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala (91%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala (98%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala (95%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala (90%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/pubmed/PMArticle.java (97%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/pubmed/PMAuthor.java (93%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/pubmed/PMGrant.java (94%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/pubmed/PMJournal.java (95%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/pubmed/PMParser.scala (99%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/pubmed/PMSubject.java (94%) rename dhp-workflows/dhp-aggregation/src/main/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/pubmed/PubMedToOaf.scala (99%) rename dhp-workflows/{dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/bio => dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db}/oozie_app/config-default.xml (100%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db/oozie_app/workflow.xml rename dhp-workflows/{dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/bio => dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi}/bio_to_oaf_params.json (100%) rename dhp-workflows/dhp-aggregation/src/test/java/eu/{dnetllib => dnetlib}/dhp/sx/bio/BioScholixTest.scala (97%) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/bio/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/BioDBToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala similarity index 99% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/BioDBToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala index dffc88c6c..70dcc0184 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/BioDBToOAF.scala @@ -1,4 +1,4 @@ -package eu.dnetllib.dhp.sx.bio +package eu.dnetlib.dhp.sx.bio import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, OafMapperUtils} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala similarity index 91% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala index 16d2b25a6..7a62437a3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/SparkTransformBioDatabaseToOAF.scala @@ -1,8 +1,8 @@ -package eu.dnetllib.dhp.sx.bio +package eu.dnetlib.dhp.sx.bio import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf.Oaf -import eu.dnetllib.dhp.sx.bio.BioDBToOAF.ScholixResolved +import BioDBToOAF.ScholixResolved import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} @@ -13,7 +13,7 @@ object SparkTransformBioDatabaseToOAF { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() val log: Logger = LoggerFactory.getLogger(getClass) - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/bio_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json"))) parser.parseArgument(args) val database: String = parser.get("database") log.info("database: {}", database) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 17bf3fa6b..17d21f19c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -1,10 +1,10 @@ -package eu.dnetllib.dhp.sx.bio.ebi +package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.oaf.Result +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} import eu.dnetlib.dhp.utils.ISLookupClientFactory -import eu.dnetllib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FSDataOutputStream, FileSystem, Path} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala similarity index 95% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala index 85fbd99c4..eab6b1dc6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkDownloadEBILinks.scala @@ -1,8 +1,9 @@ -package eu.dnetllib.dhp.sx.bio.ebi +package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetllib.dhp.sx.bio.BioDBToOAF.EBILinkItem -import eu.dnetllib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal} +import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem +import eu.dnetlib.dhp.sx.bio.pubmed.PMJournal import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.HttpGet diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala similarity index 90% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala index 10467884c..b19bfc23a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/ebi/SparkEBILinksToOaf.scala @@ -1,9 +1,10 @@ -package eu.dnetllib.dhp.sx.bio.ebi +package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf.Oaf -import eu.dnetllib.dhp.sx.bio.BioDBToOAF -import eu.dnetllib.dhp.sx.bio.BioDBToOAF.EBILinkItem +import eu.dnetlib.dhp.sx.bio.BioDBToOAF +import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem +import BioDBToOAF.EBILinkItem import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql._ diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMArticle.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMArticle.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java index 305bb89be..881528425 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMArticle.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java @@ -1,5 +1,5 @@ -package eu.dnetllib.dhp.sx.bio.pubmed; +package eu.dnetlib.dhp.sx.bio.pubmed; import java.io.Serializable; import java.util.ArrayList; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMAuthor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMAuthor.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java index c89929981..cef92d003 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMAuthor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java @@ -1,5 +1,5 @@ -package eu.dnetllib.dhp.sx.bio.pubmed; +package eu.dnetlib.dhp.sx.bio.pubmed; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMGrant.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMGrant.java similarity index 94% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMGrant.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMGrant.java index 7df5dd5f2..ce9420cc1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMGrant.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMGrant.java @@ -1,5 +1,5 @@ -package eu.dnetllib.dhp.sx.bio.pubmed; +package eu.dnetlib.dhp.sx.bio.pubmed; public class PMGrant { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMJournal.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMJournal.java similarity index 95% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMJournal.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMJournal.java index 6065416f8..863a23bd5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMJournal.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMJournal.java @@ -1,5 +1,5 @@ -package eu.dnetllib.dhp.sx.bio.pubmed; +package eu.dnetlib.dhp.sx.bio.pubmed; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala similarity index 99% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMParser.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala index 8fa226b7d..80cb0667c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMParser.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala @@ -1,4 +1,4 @@ -package eu.dnetllib.dhp.sx.bio.pubmed +package eu.dnetlib.dhp.sx.bio.pubmed import scala.xml.MetaData import scala.xml.pull.{EvElemEnd, EvElemStart, EvText, XMLEventReader} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMSubject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMSubject.java similarity index 94% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMSubject.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMSubject.java index e6ab61b87..862d39a94 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PMSubject.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMSubject.java @@ -1,5 +1,5 @@ -package eu.dnetllib.dhp.sx.bio.pubmed; +package eu.dnetlib.dhp.sx.bio.pubmed; public class PMSubject { private String value; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala similarity index 99% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PubMedToOaf.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index a1777a230..13f38408e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetllib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -1,4 +1,4 @@ -package eu.dnetllib.dhp.sx.bio.pubmed +package eu.dnetlib.dhp.sx.bio.pubmed import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/bio/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/bio/oozie_app/config-default.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db/oozie_app/workflow.xml new file mode 100644 index 000000000..071d202b6 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/db/oozie_app/workflow.xml @@ -0,0 +1,51 @@ + + + + sourcePath + the PDB Database Working Path + + + database + the PDB Database Working Path + + + + targetPath + the Target Working dir path + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + Convert Bio DB to OAF Dataset + eu.dnetlib.dhp.sx.bio.SparkTransformBioDatabaseToOAF + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=2000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn + --dbPath${sourcePath} + --database${database} + --targetPath${targetPath} + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/bio/bio_to_oaf_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/bio/bio_to_oaf_params.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/bio_to_oaf_params.json diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetllib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala similarity index 97% rename from dhp-workflows/dhp-aggregation/src/test/java/eu/dnetllib/dhp/sx/bio/BioScholixTest.scala rename to dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala index c072f149c..893a6e628 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetllib/dhp/sx/bio/BioScholixTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala @@ -1,10 +1,10 @@ -package eu.dnetllib.dhp.sx.bio +package eu.dnetlib.dhp.sx.bio import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature} import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} -import eu.dnetllib.dhp.sx.bio.BioDBToOAF.ScholixResolved -import eu.dnetllib.dhp.sx.bio.pubmed.{PMArticle, PMParser, PubMedToOaf} +import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved +import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMParser, PubMedToOaf} import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/bio/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/bio/oozie_app/workflow.xml deleted file mode 100644 index 0df085ee1..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/bio/oozie_app/workflow.xml +++ /dev/null @@ -1,177 +0,0 @@ - - - - PDBPath - the PDB Database Working Path - - - - UNIPROTDBPath - the UNIPROT Database Working Path - - - - EBIDataset - the EBI Links Dataset Path - - - - ScholixResolvedDBPath - the Scholix Resolved Dataset Path - - - - CrossrefLinksPath - the CrossrefLinks Path - - - targetPath - the Target Working dir path - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - yarn - cluster - Convert PDB to OAF Dataset - eu.dnetlib.dhp.sx.graph.bio.SparkTransformBioDatabaseToOAF - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=2000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn - --dbPath${PDBPath} - --databasePDB - --targetPath${targetPath}/pdb_OAF - - - - - - - - - yarn - cluster - Convert UNIPROT to OAF Dataset - eu.dnetlib.dhp.sx.graph.bio.SparkTransformBioDatabaseToOAF - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=2000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn - --dbPath${UNIPROTDBPath} - --databaseUNIPROT - --targetPath${targetPath}/uniprot_OAF - - - - - - - - - yarn - cluster - Convert EBI Links to OAF Dataset - eu.dnetlib.dhp.sx.graph.ebi.SparkEBILinksToOaf - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=2000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn - --sourcePath${EBIDataset} - --targetPath${targetPath}/ebi_OAF - - - - - - - - - yarn - cluster - Convert Scholix to OAF Dataset - eu.dnetlib.dhp.sx.graph.bio.SparkTransformBioDatabaseToOAF - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=2000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn - --dbPath${ScholixResolvedDBPath} - --databaseSCHOLIX - --targetPath${targetPath}/scholix_resolved_OAF - - - - - - - - - yarn - cluster - Convert Crossref Links to OAF Dataset - eu.dnetlib.dhp.sx.graph.bio.SparkTransformBioDatabaseToOAF - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=2000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn - --dbPath${CrossrefLinksPath} - --databaseCROSSREF_LINKS - --targetPath${targetPath}/crossref_unresolved_relation_OAF - - - - - - - - - - - \ No newline at end of file From 46f82c7c8fecadd36754e0caf04fa2ef09c889ee Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 18 Oct 2021 10:53:16 +0200 Subject: [PATCH 280/287] removed not needed folder deletion --- .../eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index ab3b9593e..40a17b486 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -107,7 +107,6 @@ - From a894d7adf3dd104f56bdfbbe8e4797813a9f168a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 19 Oct 2021 10:02:55 +0200 Subject: [PATCH 281/287] updated version of dhp-schemas --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3e8d6bc19..02bc5d8d4 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.8.20] + [2.8.21] [4.0.3] [6.0.5] [3.1.6] From c7f6cd2591ddc7b6e9f6a1ca4eb29c62a73869f5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 19 Oct 2021 10:15:26 +0200 Subject: [PATCH 282/287] added again the setting for saXReader --- .../eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index ab5434ed0..b972de6e9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -62,7 +62,7 @@ public class QueryInformationSystem { for (String xml : communityMap) { final Document doc; final SAXReader reader = new SAXReader(); - // reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); doc = reader.read(new StringReader(xml)); Element root = doc.getRootElement(); map.put(root.attribute("id").getValue(), root.attribute("label").getValue()); From 7a73010acdd93ca7f7d470c8f732315b54482acc Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 19 Oct 2021 11:59:16 +0200 Subject: [PATCH 283/287] WIP: worflow nodes for including Scholexplorer records in the RAW graph --- .../oa/graph/raw/CopyHdfsOafApplication.java | 143 ++++++++++++++++++ .../raw/MigrateHdfsMdstoresApplication.java | 26 ---- .../common/AbstractMigrationApplication.java | 47 ++++++ .../oa/graph/copy_hdfs_oaf_parameters.json | 38 +++++ .../oa/graph/raw_all/oozie_app/workflow.xml | 27 ++++ 5 files changed, 255 insertions(+), 26 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java new file mode 100644 index 000000000..2e0611475 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java @@ -0,0 +1,143 @@ + +package eu.dnetlib.dhp.oa.graph.raw; + +import com.clearspring.analytics.util.Lists; +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; +import eu.dnetlib.dhp.schema.common.EntityType; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class CopyHdfsOafApplication extends AbstractMigrationApplication { + + private static final Logger log = LoggerFactory.getLogger(CopyHdfsOafApplication.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + CopyHdfsOafApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json"))); + parser.parseArgument(args); + + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String mdstoreManagerUrl = parser.get("mdstoreManagerUrl"); + log.info("mdstoreManagerUrl: {}", mdstoreManagerUrl); + + final String mdFormat = parser.get("mdFormat"); + log.info("mdFormat: {}", mdFormat); + + final String mdLayout = parser.get("mdLayout"); + log.info("mdLayout: {}", mdLayout); + + final String mdInterpretation = parser.get("mdInterpretation"); + log.info("mdInterpretation: {}", mdInterpretation); + + final String hdfsPath = parser.get("hdfsPath"); + log.info("hdfsPath: {}", hdfsPath); + + final String isLookupUrl = parser.get("isLookupUrl"); + log.info("isLookupUrl: {}", isLookupUrl); + + final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl); + final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService); + + final Set paths = mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation); + + final SparkConf conf = new SparkConf(); + runWithSparkSession(conf, isSparkSessionManaged, spark -> processPaths(spark, vocs, hdfsPath, paths)); + } + + public static void processPaths(final SparkSession spark, + final VocabularyGroup vocs, + final String outputPath, + final Set paths) { + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + log.info("Found {} mdstores", paths.size()); + paths.forEach(log::info); + + final String[] validPaths = paths + .stream() + .filter(p -> HdfsSupport.exists(p, sc.hadoopConfiguration())) + .toArray(String[]::new); + log.info("Non empty mdstores {}", validPaths.length); + + if (validPaths.length > 0) { + // load the dataset + Dataset oaf = spark + .read() + .load(validPaths) + .as(Encoders.kryo(Oaf.class)); + + // dispatch each entity type individually in the respective graph subdirectory in append mode + for(Map.Entry e : ModelSupport.entityTypes.entrySet()) { + oaf + .filter((FilterFunction) o -> o.getClass().getSimpleName().toLowerCase().equals(e.getKey().toString())) + .map((MapFunction) OBJECT_MAPPER::writeValueAsString, Encoders.bean(e.getValue())) + .write() + .option("compression", "gzip") + .mode(SaveMode.Append) + .text(outputPath + "/" + e.getKey()); + } + + oaf + .flatMap((FlatMapFunction) o -> { + Relation rel = (Relation) o; + List rels = Lists.newArrayList(); + rels.add(getInverse(rel, vocs)); + + return rels.iterator(); + }, Encoders.bean(Relation.class)); + } + } + + private static Relation getInverse(Relation rel, VocabularyGroup vocs) { + final Relation inverse = new Relation(); + + inverse.setProperties(rel.getProperties()); + inverse.setValidated(rel.getValidated()); + inverse.setValidationDate(rel.getValidationDate()); + inverse.setCollectedfrom(rel.getCollectedfrom()); + inverse.setDataInfo(rel.getDataInfo()); + inverse.setLastupdatetimestamp(rel.getLastupdatetimestamp()); + + inverse.setSource(rel.getTarget()); + inverse.setTarget(rel.getSource()); + inverse.setRelType(rel.getRelType()); + inverse.setSubRelType(rel.getSubRelType()); + + return inverse; + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java index 4110bd806..6c72e4dfc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java @@ -135,30 +135,4 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication } } - private static Set mdstorePaths(final String mdstoreManagerUrl, - final String format, - final String layout, - final String interpretation) throws IOException { - final String url = mdstoreManagerUrl + "/mdstores/"; - final ObjectMapper objectMapper = new ObjectMapper(); - - final HttpGet req = new HttpGet(url); - - try (final CloseableHttpClient client = HttpClients.createDefault()) { - try (final CloseableHttpResponse response = client.execute(req)) { - final String json = IOUtils.toString(response.getEntity().getContent()); - final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class); - return Arrays - .stream(mdstores) - .filter(md -> md.getFormat().equalsIgnoreCase(format)) - .filter(md -> md.getLayout().equalsIgnoreCase(layout)) - .filter(md -> md.getInterpretation().equalsIgnoreCase(interpretation)) - .filter(md -> StringUtils.isNotBlank(md.getHdfsPath())) - .filter(md -> StringUtils.isNotBlank(md.getCurrentVersion())) - .filter(md -> md.getSize() > 0) - .map(md -> md.getHdfsPath() + "/" + md.getCurrentVersion() + "/store") - .collect(Collectors.toSet()); - } - } - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java index 5d32fe926..7c88dbd9d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java @@ -3,8 +3,14 @@ package eu.dnetlib.dhp.oa.graph.raw.common; import java.io.Closeable; import java.io.IOException; +import java.util.Arrays; +import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -16,6 +22,10 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Oaf; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; public class AbstractMigrationApplication implements Closeable { @@ -47,6 +57,43 @@ public class AbstractMigrationApplication implements Closeable { SequenceFile.Writer.valueClass(Text.class)); } + /** + * Retrieves from the metadata store manager application the list of paths associated with mdstores characterized + * by he given format, layout, interpretation + * @param mdstoreManagerUrl the URL of the mdstore manager service + * @param format the mdstore format + * @param layout the mdstore layout + * @param interpretation the mdstore interpretation + * @return the set of hdfs paths + * @throws IOException in case of HTTP communication issues + */ + protected static Set mdstorePaths(final String mdstoreManagerUrl, + final String format, + final String layout, + final String interpretation) throws IOException { + final String url = mdstoreManagerUrl + "/mdstores/"; + final ObjectMapper objectMapper = new ObjectMapper(); + + final HttpGet req = new HttpGet(url); + + try (final CloseableHttpClient client = HttpClients.createDefault()) { + try (final CloseableHttpResponse response = client.execute(req)) { + final String json = IOUtils.toString(response.getEntity().getContent()); + final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class); + return Arrays + .stream(mdstores) + .filter(md -> md.getFormat().equalsIgnoreCase(format)) + .filter(md -> md.getLayout().equalsIgnoreCase(layout)) + .filter(md -> md.getInterpretation().equalsIgnoreCase(interpretation)) + .filter(md -> StringUtils.isNotBlank(md.getHdfsPath())) + .filter(md -> StringUtils.isNotBlank(md.getCurrentVersion())) + .filter(md -> md.getSize() > 0) + .map(md -> md.getHdfsPath() + "/" + md.getCurrentVersion() + "/store") + .collect(Collectors.toSet()); + } + } + } + private Configuration getConf() { return new Configuration(); /* diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json new file mode 100644 index 000000000..1e862198f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/copy_hdfs_oaf_parameters.json @@ -0,0 +1,38 @@ +[ + { + "paramName": "p", + "paramLongName": "hdfsPath", + "paramDescription": "the path where storing the sequential file", + "paramRequired": true + }, + { + "paramName": "u", + "paramLongName": "mdstoreManagerUrl", + "paramDescription": "the MdstoreManager url", + "paramRequired": true + }, + { + "paramName": "f", + "paramLongName": "mdFormat", + "paramDescription": "metadata format", + "paramRequired": true + }, + { + "paramName": "l", + "paramLongName": "mdLayout", + "paramDescription": "metadata layout", + "paramRequired": true + }, + { + "paramName": "i", + "paramLongName": "mdInterpretation", + "paramDescription": "metadata interpretation", + "paramRequired": true + }, + { + "paramName": "isu", + "paramLongName": "isLookupUrl", + "paramDescription": "the url of the ISLookupService", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index 321ca4090..563923a5a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -544,6 +544,33 @@ --sourcePath${workingDir}/entities --graphRawPath${workingDir}/graph_raw + + + + + + + yarn + cluster + ImportOAF_hdfs_graph + eu.dnetlib.dhp.oa.graph.raw.CopyHdfsOafApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --hdfsPath${workingDir}/graph_raw + --mdstoreManagerUrl${mdstoreManagerUrl} + --mdFormatOAF + --mdLayoutstore + --mdInterpretationgraph + --isLookupUrl${isLookupUrl} + From f8329bc11052f327336089b70a85ccbe90bca511 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 19 Oct 2021 15:24:22 +0200 Subject: [PATCH 284/287] since dhp-schemas changed, introducing new Relation inverse model, this class has been updated --- .../java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java index 38ffd28fe..7d0d6b0b8 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java @@ -91,8 +91,8 @@ public class ReadBlacklistFromDB implements Closeable { String encoding = rs.getString("relationship"); RelationInverse ri = ModelSupport.relationInverseMap.get(encoding); - direct.setRelClass(ri.getRelation()); - inverse.setRelClass(ri.getInverse()); + direct.setRelClass(ri.getRelClass()); + inverse.setRelClass(ri.getInverseRelClass()); direct.setRelType(ri.getRelType()); inverse.setRelType(ri.getRelType()); direct.setSubRelType(ri.getSubReltype()); From c9870c5122d2d59ece2a4500dc2e75490c176202 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 19 Oct 2021 15:24:59 +0200 Subject: [PATCH 285/287] code formatted --- .../oa/provision/SolrConfigExploreTest.java | 154 +++++++++--------- .../dhp/oa/provision/SolrConfigTest.java | 150 ++++++++--------- .../dhp/oa/provision/SolrExploreTest.java | 128 +++++++-------- 3 files changed, 216 insertions(+), 216 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java index 9bc2924c3..3beca7e7e 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java @@ -25,102 +25,102 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class SolrConfigExploreTest extends SolrExploreTest { - protected static SparkSession spark; + protected static SparkSession spark; - private static final Integer batchSize = 100; + private static final Integer batchSize = 100; - @Mock - private ISLookUpService isLookUpService; + @Mock + private ISLookUpService isLookUpService; - @Mock - private ISLookupClient isLookupClient; + @Mock + private ISLookupClient isLookupClient; - @BeforeEach - public void prepareMocks() throws ISLookUpException, IOException { - isLookupClient.setIsLookup(isLookUpService); + @BeforeEach + public void prepareMocks() throws ISLookUpException, IOException { + isLookupClient.setIsLookup(isLookUpService); - int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort(); + int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort(); - Mockito - .when(isLookupClient.getDsId(Mockito.anyString())) - .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); - Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort)); - Mockito - .when(isLookupClient.getLayoutSource(Mockito.anyString())) - .thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml"))); - Mockito - .when(isLookupClient.getLayoutTransformer()) - .thenReturn(IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"))); - } + Mockito + .when(isLookupClient.getDsId(Mockito.anyString())) + .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); + Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort)); + Mockito + .when(isLookupClient.getLayoutSource(Mockito.anyString())) + .thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml"))); + Mockito + .when(isLookupClient.getLayoutTransformer()) + .thenReturn(IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"))); + } - @BeforeAll - public static void before() { + @BeforeAll + public static void before() { - SparkConf conf = new SparkConf(); - conf.setAppName(XmlIndexingJobTest.class.getSimpleName()); - conf.registerKryoClasses(new Class[] { - SerializableSolrInputDocument.class - }); + SparkConf conf = new SparkConf(); + conf.setAppName(XmlIndexingJobTest.class.getSimpleName()); + conf.registerKryoClasses(new Class[] { + SerializableSolrInputDocument.class + }); - conf.setMaster("local[1]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString()); + conf.setMaster("local[1]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString()); - spark = SparkSession - .builder() - .appName(XmlIndexingJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); + spark = SparkSession + .builder() + .appName(XmlIndexingJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } - } + @AfterAll + public static void tearDown() { + spark.stop(); + } - @AfterAll - public static void tearDown() { - spark.stop(); - } + @Test + public void testSolrConfig() throws Exception { - @Test - public void testSolrConfig() throws Exception { + String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; - String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; + new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, XmlIndexingJob.OutputFormat.SOLR, null) + .run(isLookupClient); + Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); - new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, XmlIndexingJob.OutputFormat.SOLR, null).run(isLookupClient); - Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); + String[] queryStrings = { + "cancer", + "graph", + "graphs" + }; - String[] queryStrings = { - "cancer", - "graph", - "graphs" - }; + for (String q : queryStrings) { + SolrQuery query = new SolrQuery(); + query.setRequestHandler("/exploreSearch"); + query.add(CommonParams.Q, q); + query.set("debugQuery", "on"); - for (String q : queryStrings) { - SolrQuery query = new SolrQuery(); - query.setRequestHandler("/exploreSearch"); - query.add(CommonParams.Q, q); - query.set("debugQuery", "on"); - - log.info("Submit query to Solr with params: {}", query.toString()); - QueryResponse rsp = miniCluster.getSolrClient().query(query); + log.info("Submit query to Solr with params: {}", query.toString()); + QueryResponse rsp = miniCluster.getSolrClient().query(query); // System.out.println(rsp.getHighlighting()); // System.out.println(rsp.getExplainMap()); - for (SolrDocument doc : rsp.getResults()) { - System.out.println( - doc.get("score") + "\t" + - doc.get("__indexrecordidentifier") + "\t" + - doc.get("resultidentifier") + "\t" + - doc.get("resultauthor") + "\t" + - doc.get("resultacceptanceyear") + "\t" + - doc.get("resultsubject") + "\t" + - doc.get("resulttitle") + "\t" + - doc.get("relprojectname") + "\t" + - doc.get("resultdescription") + "\t" + - doc.get("__all") + "\t" - ); - } - } - } + for (SolrDocument doc : rsp.getResults()) { + System.out + .println( + doc.get("score") + "\t" + + doc.get("__indexrecordidentifier") + "\t" + + doc.get("resultidentifier") + "\t" + + doc.get("resultauthor") + "\t" + + doc.get("resultacceptanceyear") + "\t" + + doc.get("resultsubject") + "\t" + + doc.get("resulttitle") + "\t" + + doc.get("relprojectname") + "\t" + + doc.get("resultdescription") + "\t" + + doc.get("__all") + "\t"); + } + } + } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java index e20ecf152..ab98b1da2 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java @@ -34,98 +34,98 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class SolrConfigTest extends SolrTest { - protected static SparkSession spark; + protected static SparkSession spark; - private static final Integer batchSize = 100; + private static final Integer batchSize = 100; - @Mock - private ISLookUpService isLookUpService; + @Mock + private ISLookUpService isLookUpService; - @Mock - private ISLookupClient isLookupClient; + @Mock + private ISLookupClient isLookupClient; - @BeforeEach - public void prepareMocks() throws ISLookUpException, IOException { - isLookupClient.setIsLookup(isLookUpService); + @BeforeEach + public void prepareMocks() throws ISLookUpException, IOException { + isLookupClient.setIsLookup(isLookUpService); - int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort(); + int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort(); - Mockito - .when(isLookupClient.getDsId(Mockito.anyString())) - .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); - Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort)); - Mockito - .when(isLookupClient.getLayoutSource(Mockito.anyString())) - .thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml"))); - Mockito - .when(isLookupClient.getLayoutTransformer()) - .thenReturn(IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"))); - } + Mockito + .when(isLookupClient.getDsId(Mockito.anyString())) + .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); + Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort)); + Mockito + .when(isLookupClient.getLayoutSource(Mockito.anyString())) + .thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml"))); + Mockito + .when(isLookupClient.getLayoutTransformer()) + .thenReturn(IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"))); + } - @BeforeAll - public static void before() { + @BeforeAll + public static void before() { - SparkConf conf = new SparkConf(); - conf.setAppName(XmlIndexingJobTest.class.getSimpleName()); - conf.registerKryoClasses(new Class[] { - SerializableSolrInputDocument.class - }); + SparkConf conf = new SparkConf(); + conf.setAppName(XmlIndexingJobTest.class.getSimpleName()); + conf.registerKryoClasses(new Class[] { + SerializableSolrInputDocument.class + }); - conf.setMaster("local[1]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString()); + conf.setMaster("local[1]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString()); - spark = SparkSession - .builder() - .appName(XmlIndexingJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); + spark = SparkSession + .builder() + .appName(XmlIndexingJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } - } + @AfterAll + public static void tearDown() { + spark.stop(); + } - @AfterAll - public static void tearDown() { - spark.stop(); - } + @Test + public void testSolrConfig() throws Exception { - @Test - public void testSolrConfig() throws Exception { + String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; - String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; + new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, XmlIndexingJob.OutputFormat.SOLR, null) + .run(isLookupClient); + Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); - new XmlIndexingJob(spark, inputPath, FORMAT, batchSize, XmlIndexingJob.OutputFormat.SOLR, null).run(isLookupClient); - Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); + String[] queryStrings = { + "cancer", + "graph", + "graphs" + }; - String[] queryStrings = { - "cancer", - "graph", - "graphs" - }; + for (String q : queryStrings) { + SolrQuery query = new SolrQuery(); + query.add(CommonParams.Q, q); - for (String q : queryStrings) { - SolrQuery query = new SolrQuery(); - query.add(CommonParams.Q, q); + log.info("Submit query to Solr with params: {}", query.toString()); + QueryResponse rsp = miniCluster.getSolrClient().query(query); - log.info("Submit query to Solr with params: {}", query.toString()); - QueryResponse rsp = miniCluster.getSolrClient().query(query); - - for (SolrDocument doc : rsp.getResults()) { - System.out.println( - doc.get("score") + "\t" + - doc.get("__indexrecordidentifier") + "\t" + - doc.get("resultidentifier") + "\t" + - doc.get("resultauthor") + "\t" + - doc.get("resultacceptanceyear") + "\t" + - doc.get("resultsubject") + "\t" + - doc.get("resulttitle") + "\t" + - doc.get("relprojectname") + "\t" + - doc.get("resultdescription") + "\t" + - doc.get("__all") + "\t" - ); - } - } - } + for (SolrDocument doc : rsp.getResults()) { + System.out + .println( + doc.get("score") + "\t" + + doc.get("__indexrecordidentifier") + "\t" + + doc.get("resultidentifier") + "\t" + + doc.get("resultauthor") + "\t" + + doc.get("resultacceptanceyear") + "\t" + + doc.get("resultsubject") + "\t" + + doc.get("resulttitle") + "\t" + + doc.get("relprojectname") + "\t" + + doc.get("resultdescription") + "\t" + + doc.get("__all") + "\t"); + } + } + } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java index b86fd8ac8..34a9465a7 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java @@ -23,87 +23,87 @@ import org.slf4j.LoggerFactory; public abstract class SolrExploreTest { - protected static final Logger log = LoggerFactory.getLogger(SolrTest.class); + protected static final Logger log = LoggerFactory.getLogger(SolrTest.class); - protected static final String FORMAT = "test"; - protected static final String DEFAULT_COLLECTION = FORMAT + "-index-openaire"; - protected static final String CONFIG_NAME = "testConfig"; + protected static final String FORMAT = "test"; + protected static final String DEFAULT_COLLECTION = FORMAT + "-index-openaire"; + protected static final String CONFIG_NAME = "testConfig"; - protected static MiniSolrCloudCluster miniCluster; + protected static MiniSolrCloudCluster miniCluster; - @TempDir - public static Path workingDir; + @TempDir + public static Path workingDir; - @BeforeAll - public static void setup() throws Exception { + @BeforeAll + public static void setup() throws Exception { - // random unassigned HTTP port - final int jettyPort = 0; - final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build(); + // random unassigned HTTP port + final int jettyPort = 0; + final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build(); - log.info(String.format("working directory: %s", workingDir.toString())); - System.setProperty("solr.log.dir", workingDir.resolve("logs").toString()); + log.info(String.format("working directory: %s", workingDir.toString())); + System.setProperty("solr.log.dir", workingDir.resolve("logs").toString()); - // create a MiniSolrCloudCluster instance - miniCluster = new MiniSolrCloudCluster(2, workingDir.resolve("solr"), jettyConfig); + // create a MiniSolrCloudCluster instance + miniCluster = new MiniSolrCloudCluster(2, workingDir.resolve("solr"), jettyConfig); - // Upload Solr configuration directory to ZooKeeper - String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig"; - File configDir = new File(solrZKConfigDir); + // Upload Solr configuration directory to ZooKeeper + String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig"; + File configDir = new File(solrZKConfigDir); - miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME); + miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME); - // override settings in the solrconfig include - System.setProperty("solr.tests.maxBufferedDocs", "100000"); - System.setProperty("solr.tests.maxIndexingThreads", "-1"); - System.setProperty("solr.tests.ramBufferSizeMB", "100"); + // override settings in the solrconfig include + System.setProperty("solr.tests.maxBufferedDocs", "100000"); + System.setProperty("solr.tests.maxIndexingThreads", "-1"); + System.setProperty("solr.tests.ramBufferSizeMB", "100"); - // use non-test classes so RandomizedRunner isn't necessary - System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler"); - System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory"); - System.setProperty("solr.lock.type", "single"); + // use non-test classes so RandomizedRunner isn't necessary + System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler"); + System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory"); + System.setProperty("solr.lock.type", "single"); - log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString()); - log - .info( - CollectionAdminRequest.ClusterStatus - .getClusterStatus() - .process(miniCluster.getSolrClient()) - .toString()); + log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString()); + log + .info( + CollectionAdminRequest.ClusterStatus + .getClusterStatus() + .process(miniCluster.getSolrClient()) + .toString()); - NamedList res = createCollection( - miniCluster.getSolrClient(), DEFAULT_COLLECTION, 4, 2, 20, CONFIG_NAME); - res.forEach(o -> log.info(o.toString())); + NamedList res = createCollection( + miniCluster.getSolrClient(), DEFAULT_COLLECTION, 4, 2, 20, CONFIG_NAME); + res.forEach(o -> log.info(o.toString())); - miniCluster.getSolrClient().setDefaultCollection(DEFAULT_COLLECTION); + miniCluster.getSolrClient().setDefaultCollection(DEFAULT_COLLECTION); - log - .info( - CollectionAdminRequest.ClusterStatus - .getClusterStatus() - .process(miniCluster.getSolrClient()) - .toString()); + log + .info( + CollectionAdminRequest.ClusterStatus + .getClusterStatus() + .process(miniCluster.getSolrClient()) + .toString()); - } + } - @AfterAll - public static void shutDown() throws Exception { - miniCluster.shutdown(); - FileUtils.deleteDirectory(workingDir.toFile()); - } + @AfterAll + public static void shutDown() throws Exception { + miniCluster.shutdown(); + FileUtils.deleteDirectory(workingDir.toFile()); + } - protected static NamedList createCollection(CloudSolrClient client, String name, int numShards, - int replicationFactor, int maxShardsPerNode, String configName) throws Exception { - ModifiableSolrParams modParams = new ModifiableSolrParams(); - modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name()); - modParams.set("name", name); - modParams.set("numShards", numShards); - modParams.set("replicationFactor", replicationFactor); - modParams.set("collection.configName", configName); - modParams.set("maxShardsPerNode", maxShardsPerNode); - QueryRequest request = new QueryRequest(modParams); - request.setPath("/admin/collections"); - return client.request(request); - } + protected static NamedList createCollection(CloudSolrClient client, String name, int numShards, + int replicationFactor, int maxShardsPerNode, String configName) throws Exception { + ModifiableSolrParams modParams = new ModifiableSolrParams(); + modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name()); + modParams.set("name", name); + modParams.set("numShards", numShards); + modParams.set("replicationFactor", replicationFactor); + modParams.set("collection.configName", configName); + modParams.set("maxShardsPerNode", maxShardsPerNode); + QueryRequest request = new QueryRequest(modParams); + request.setPath("/admin/collections"); + return client.request(request); + } } From 98f37c8d81222bce8cb4870850e65dcf52fc375a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 19 Oct 2021 16:14:40 +0200 Subject: [PATCH 286/287] WIP: worflow nodes for including Scholexplorer records in the RAW graph --- .../dhp/oa/graph/raw/CopyHdfsOafApplication.java | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java index 2e0611475..c0accd25a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java @@ -101,24 +101,15 @@ public class CopyHdfsOafApplication extends AbstractMigrationApplication { .as(Encoders.kryo(Oaf.class)); // dispatch each entity type individually in the respective graph subdirectory in append mode - for(Map.Entry e : ModelSupport.entityTypes.entrySet()) { + for(Map.Entry e : ModelSupport.oafTypes.entrySet()) { oaf - .filter((FilterFunction) o -> o.getClass().getSimpleName().toLowerCase().equals(e.getKey().toString())) + .filter((FilterFunction) o -> o.getClass().getSimpleName().toLowerCase().equals(e.getKey())) .map((MapFunction) OBJECT_MAPPER::writeValueAsString, Encoders.bean(e.getValue())) .write() .option("compression", "gzip") .mode(SaveMode.Append) .text(outputPath + "/" + e.getKey()); } - - oaf - .flatMap((FlatMapFunction) o -> { - Relation rel = (Relation) o; - List rels = Lists.newArrayList(); - rels.add(getInverse(rel, vocs)); - - return rels.iterator(); - }, Encoders.bean(Relation.class)); } } From 512e7b01707690d4539fad1d444da92055678c5e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 19 Oct 2021 16:19:29 +0200 Subject: [PATCH 287/287] code formatting --- .../oa/graph/raw/CopyHdfsOafApplication.java | 38 ++++++++++--------- .../common/AbstractMigrationApplication.java | 36 +++++++++--------- 2 files changed, 38 insertions(+), 36 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java index c0accd25a..4cff88d82 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/CopyHdfsOafApplication.java @@ -1,18 +1,10 @@ package eu.dnetlib.dhp.oa.graph.raw; -import com.clearspring.analytics.util.Lists; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.*; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; @@ -26,9 +18,19 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.*; +import com.clearspring.analytics.util.Lists; +import com.fasterxml.jackson.databind.ObjectMapper; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; +import eu.dnetlib.dhp.schema.common.EntityType; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class CopyHdfsOafApplication extends AbstractMigrationApplication { @@ -96,12 +98,12 @@ public class CopyHdfsOafApplication extends AbstractMigrationApplication { if (validPaths.length > 0) { // load the dataset Dataset oaf = spark - .read() - .load(validPaths) - .as(Encoders.kryo(Oaf.class)); + .read() + .load(validPaths) + .as(Encoders.kryo(Oaf.class)); // dispatch each entity type individually in the respective graph subdirectory in append mode - for(Map.Entry e : ModelSupport.oafTypes.entrySet()) { + for (Map.Entry e : ModelSupport.oafTypes.entrySet()) { oaf .filter((FilterFunction) o -> o.getClass().getSimpleName().toLowerCase().equals(e.getKey())) .map((MapFunction) OBJECT_MAPPER::writeValueAsString, Encoders.bean(e.getValue())) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java index 7c88dbd9d..8cd495e08 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java @@ -8,7 +8,6 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; @@ -17,16 +16,17 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.schema.oaf.Oaf; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo; +import eu.dnetlib.dhp.schema.oaf.Oaf; + public class AbstractMigrationApplication implements Closeable { private final AtomicInteger counter = new AtomicInteger(0); @@ -68,9 +68,9 @@ public class AbstractMigrationApplication implements Closeable { * @throws IOException in case of HTTP communication issues */ protected static Set mdstorePaths(final String mdstoreManagerUrl, - final String format, - final String layout, - final String interpretation) throws IOException { + final String format, + final String layout, + final String interpretation) throws IOException { final String url = mdstoreManagerUrl + "/mdstores/"; final ObjectMapper objectMapper = new ObjectMapper(); @@ -81,15 +81,15 @@ public class AbstractMigrationApplication implements Closeable { final String json = IOUtils.toString(response.getEntity().getContent()); final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class); return Arrays - .stream(mdstores) - .filter(md -> md.getFormat().equalsIgnoreCase(format)) - .filter(md -> md.getLayout().equalsIgnoreCase(layout)) - .filter(md -> md.getInterpretation().equalsIgnoreCase(interpretation)) - .filter(md -> StringUtils.isNotBlank(md.getHdfsPath())) - .filter(md -> StringUtils.isNotBlank(md.getCurrentVersion())) - .filter(md -> md.getSize() > 0) - .map(md -> md.getHdfsPath() + "/" + md.getCurrentVersion() + "/store") - .collect(Collectors.toSet()); + .stream(mdstores) + .filter(md -> md.getFormat().equalsIgnoreCase(format)) + .filter(md -> md.getLayout().equalsIgnoreCase(layout)) + .filter(md -> md.getInterpretation().equalsIgnoreCase(interpretation)) + .filter(md -> StringUtils.isNotBlank(md.getHdfsPath())) + .filter(md -> StringUtils.isNotBlank(md.getCurrentVersion())) + .filter(md -> md.getSize() > 0) + .map(md -> md.getHdfsPath() + "/" + md.getCurrentVersion() + "/store") + .collect(Collectors.toSet()); } } }