From fcbb4c148929c06756756f18a14cece1a5c232b3 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Wed, 24 Jun 2020 16:29:32 +0200 Subject: [PATCH 001/108] parser of orcid publication data from xml original dump --- dhp-workflows/dhp-doiboost/pom.xml | 6 + .../doiboost/orcid/model/AuthorData.java | 9 + .../orcidnodoi/model/Contributor.java | 54 +++ .../doiboost/orcidnodoi/model/ExternalId.java | 32 ++ .../orcidnodoi/model/PublicationDate.java | 32 ++ .../orcidnodoi/model/WorkDataNoDoi.java | 101 ++++++ .../orcidnodoi/xml/XMLRecordParserNoDoi.java | 216 ++++++++++++ .../orcid/xml/XMLRecordParserTest.java | 2 +- .../orcidnodoi/xml/OrcidNoDoiTest.java | 326 ++++++++++++++++++ .../xml/activity_work_0000-0003-2760-1191.xml | 106 ++++++ .../xml/activity_work_0000-0002-5982-8983.xml | 0 ...ty_work_0000-0003-2760-1191-similarity.xml | 113 ++++++ .../xml/activity_work_0000-0003-2760-1191.xml | 106 ++++++ 13 files changed, 1102 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java create mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/activity_work_0000-0003-2760-1191.xml rename dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/{orcid => orcidnodoi}/xml/activity_work_0000-0002-5982-8983.xml (100%) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191-similarity.xml create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191.xml diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index 39bb81ec1..2662d0a39 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -84,6 +84,12 @@ spark-sql_2.11 + + org.apache.commons + commons-text + 1.8 + + diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java index 29551c347..87f1f65c8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java @@ -9,6 +9,7 @@ public class AuthorData implements Serializable { private String name; private String surname; private String creditName; + private String otherName; private String errorCode; public String getErrorCode() { @@ -50,4 +51,12 @@ public class AuthorData implements Serializable { public void setOid(String oid) { this.oid = oid; } + + public String getOtherName() { + return otherName; + } + + public void setOtherName(String otherName) { + this.otherName = otherName; + } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java new file mode 100644 index 000000000..42076de5d --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java @@ -0,0 +1,54 @@ + +package eu.dnetlib.doiboost.orcidnodoi.model; + +import java.io.Serializable; + +import eu.dnetlib.doiboost.orcid.model.AuthorData; + +public class Contributor extends AuthorData implements Serializable { + private String sequence; + private String role; + private boolean simpleMatch = false; + private Double score = 0.0; + private boolean bestMatch = false; + + public String getSequence() { + return sequence; + } + + public void setSequence(String sequence) { + this.sequence = sequence; + } + + public String getRole() { + return role; + } + + public void setRole(String role) { + this.role = role; + } + + public boolean isSimpleMatch() { + return simpleMatch; + } + + public void setSimpleMatch(boolean simpleMatch) { + this.simpleMatch = simpleMatch; + } + + public Double getScore() { + return score; + } + + public void setScore(Double score) { + this.score = score; + } + + public boolean isBestMatch() { + return bestMatch; + } + + public void setBestMatch(boolean bestMatch) { + this.bestMatch = bestMatch; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java new file mode 100644 index 000000000..865e54ae3 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java @@ -0,0 +1,32 @@ + +package eu.dnetlib.doiboost.orcidnodoi.model; + +public class ExternalId { + private String type; + private String value; + private String relationShip; + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } + + public String getRelationShip() { + return relationShip; + } + + public void setRelationShip(String relationShip) { + this.relationShip = relationShip; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java new file mode 100644 index 000000000..9282a80ba --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java @@ -0,0 +1,32 @@ + +package eu.dnetlib.doiboost.orcidnodoi.model; + +public class PublicationDate { + private String year; + private String month; + private String day; + + public String getYear() { + return year; + } + + public void setYear(String year) { + this.year = year; + } + + public String getMonth() { + return month; + } + + public void setMonth(String month) { + this.month = month; + } + + public String getDay() { + return day; + } + + public void setDay(String day) { + this.day = day; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java new file mode 100644 index 000000000..ee13454e1 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java @@ -0,0 +1,101 @@ + +package eu.dnetlib.doiboost.orcidnodoi.model; + +import java.io.Serializable; +import java.util.List; + +public class WorkDataNoDoi implements Serializable { + + private String oid; + private String id; + private String sourceName; + private String type; + private List titles; + private List urls; + List extIds; + List publicationDates; + List contributors; + + public String getOid() { + return oid; + } + + public void setOid(String oid) { + this.oid = oid; + } + + public String getErrorCode() { + return errorCode; + } + + public void setErrorCode(String errorCode) { + this.errorCode = errorCode; + } + + private String errorCode; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public List getTitles() { + return titles; + } + + public void setTitles(List titles) { + this.titles = titles; + } + + public String getSourceName() { + return sourceName; + } + + public void setSourceName(String sourceName) { + this.sourceName = sourceName; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public List getUrls() { + return urls; + } + + public void setUrls(List urls) { + this.urls = urls; + } + + public List getExtIds() { + return extIds; + } + + public void setExtIds(List extIds) { + this.extIds = extIds; + } + + public List getPublicationDates() { + return publicationDates; + } + + public void setPublicationDates(List publicationDates) { + this.publicationDates = publicationDates; + } + + public List getContributors() { + return contributors; + } + + public void setContributors(List contributors) { + this.contributors = contributors; + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java new file mode 100644 index 000000000..6e5771547 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java @@ -0,0 +1,216 @@ + +package eu.dnetlib.doiboost.orcidnodoi.xml; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.ximpleware.*; + +import eu.dnetlib.dhp.parser.utility.VtdException; +import eu.dnetlib.dhp.parser.utility.VtdUtilityParser; +import eu.dnetlib.doiboost.orcidnodoi.model.Contributor; +import eu.dnetlib.doiboost.orcidnodoi.model.ExternalId; +import eu.dnetlib.doiboost.orcidnodoi.model.PublicationDate; +import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; + +public class XMLRecordParserNoDoi { + + private static final Logger logger = LoggerFactory.getLogger(XMLRecordParserNoDoi.class); + + private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; + private static final String NS_COMMON = "common"; + private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person"; + private static final String NS_PERSON = "person"; + private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details"; + private static final String NS_DETAILS = "personal-details"; + private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name"; + private static final String NS_OTHER = "other-name"; + private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record"; + private static final String NS_RECORD = "record"; + private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error"; + + private static final String NS_WORK = "work"; + private static final String NS_WORK_URL = "http://www.orcid.org/ns/work"; + + private static final String NS_ERROR = "error"; + + public static WorkDataNoDoi VTDParseWorkData(byte[] bytes) + throws VtdException, EncodingException, EOFException, EntityException, ParseException, XPathParseException, + NavException, XPathEvalException { + logger.info("parsing xml ..."); + final VTDGen vg = new VTDGen(); + vg.setDoc(bytes); + vg.parse(true); + final VTDNav vn = vg.getNav(); + final AutoPilot ap = new AutoPilot(vn); + ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL); + ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL); + ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL); + + WorkDataNoDoi workData = new WorkDataNoDoi(); + final List errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code"); + if (!errors.isEmpty()) { + workData.setErrorCode(errors.get(0)); + return workData; + } + + List workNodes = VtdUtilityParser + .getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path", "put-code")); + if (!workNodes.isEmpty()) { + final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1]; + workData.setOid(oid); + final String id = (workNodes.get(0).getAttributes().get("put-code")); + workData.setId(id); + } else { + return null; + } + + final List titles = VtdUtilityParser + .getTextValue( + ap, vn, "//common:title"); + if (!titles.isEmpty()) { + workData.setTitles(titles); + } + + final List sourceNames = VtdUtilityParser + .getTextValue( + ap, vn, "//common:source-name"); + if (!sourceNames.isEmpty()) { + workData.setSourceName(sourceNames.get(0)); + } + + final List types = VtdUtilityParser + .getTextValue( + ap, vn, "//work:type"); + if (!types.isEmpty()) { + workData.setType(types.get(0)); + } + + final List urls = VtdUtilityParser + .getTextValue( + ap, vn, "//common:url"); + if (!urls.isEmpty()) { + workData.setUrls(urls); + } + + workData.setPublicationDates(getPublicationDates(vg, vn, ap)); + workData.setExtIds(getExternalIds(vg, vn, ap)); + workData.setContributors(getContributors(vg, vn, ap)); + return workData; + + } + + private static List getPublicationDates(VTDGen vg, VTDNav vn, AutoPilot ap) + throws XPathParseException, NavException, XPathEvalException { + List publicationDates = new ArrayList(); + int yearIndex = 0; + ap.selectXPath("//common:publication-date/common:year"); + while (ap.evalXPath() != -1) { + PublicationDate publicationDate = new PublicationDate(); + int t = vn.getText(); + if (t >= 0) { + publicationDate.setYear(vn.toNormalizedString(t)); + publicationDates.add(yearIndex, publicationDate); + yearIndex++; + } + } + int monthIndex = 0; + ap.selectXPath("//common:publication-date/common:month"); + while (ap.evalXPath() != -1) { + int t = vn.getText(); + if (t >= 0) { + publicationDates.get(monthIndex).setMonth(vn.toNormalizedString(t)); + monthIndex++; + } + } + int dayIndex = 0; + ap.selectXPath("//common:publication-date/common:day"); + while (ap.evalXPath() != -1) { + int t = vn.getText(); + if (t >= 0) { + publicationDates.get(dayIndex).setDay(vn.toNormalizedString(t)); + dayIndex++; + } + } + return publicationDates; + } + + private static List getExternalIds(VTDGen vg, VTDNav vn, AutoPilot ap) + throws XPathParseException, NavException, XPathEvalException { + List extIds = new ArrayList(); + int typeIndex = 0; + ap.selectXPath("//common:external-id/common:external-id-type"); + while (ap.evalXPath() != -1) { + ExternalId extId = new ExternalId(); + int t = vn.getText(); + if (t >= 0) { + extId.setType(vn.toNormalizedString(t)); + extIds.add(typeIndex, extId); + typeIndex++; + } + } + int valueIndex = 0; + ap.selectXPath("//common:external-id/common:external-id-value"); + while (ap.evalXPath() != -1) { + int t = vn.getText(); + if (t >= 0) { + extIds.get(valueIndex).setValue(vn.toNormalizedString(t)); + valueIndex++; + } + } + int relationshipIndex = 0; + ap.selectXPath("//common:external-id/common:external-id-relationship"); + while (ap.evalXPath() != -1) { + int t = vn.getText(); + if (t >= 0) { + extIds.get(relationshipIndex).setRelationShip(vn.toNormalizedString(t)); + relationshipIndex++; + } + } + if (typeIndex == valueIndex) { + return extIds; + } + return new ArrayList(); + } + + private static List getContributors(VTDGen vg, VTDNav vn, AutoPilot ap) + throws XPathParseException, NavException, XPathEvalException { + List contributors = new ArrayList(); + int nameIndex = 0; + ap.selectXPath("//work:contributor/work:credit-name"); + while (ap.evalXPath() != -1) { + Contributor contributor = new Contributor(); + int t = vn.getText(); + if (t >= 0) { + contributor.setCreditName(vn.toNormalizedString(t)); + contributors.add(nameIndex, contributor); + nameIndex++; + } + } + + int sequenceIndex = 0; + ap.selectXPath("//work:contributor/work:contributor-attributes/work:contributor-sequence"); + while (ap.evalXPath() != -1) { + int t = vn.getText(); + if (t >= 0) { + contributors.get(sequenceIndex).setSequence(vn.toNormalizedString(t)); + sequenceIndex++; + } + } + + int roleIndex = 0; + ap.selectXPath("//work:contributor/work:contributor-attributes/work:contributor-role"); + while (ap.evalXPath() != -1) { + int t = vn.getText(); + if (t >= 0) { + contributors.get(roleIndex).setRole(vn.toNormalizedString(t)); + roleIndex++; + } + } + return contributors; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java index d5da4eec0..4d8237f77 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java @@ -44,7 +44,7 @@ public class XMLRecordParserTest { String xml = IOUtils .toString( - this.getClass().getResourceAsStream("activity_work_0000-0002-5982-8983.xml")); + this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); XMLRecordParser p = new XMLRecordParser(); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java new file mode 100644 index 000000000..31f8432ac --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -0,0 +1,326 @@ + +package eu.dnetlib.doiboost.orcidnodoi.xml; + +import com.ximpleware.NavException; +import com.ximpleware.ParseException; +import com.ximpleware.XPathEvalException; +import com.ximpleware.XPathParseException; +import eu.dnetlib.dhp.parser.utility.VtdException; +import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.doiboost.orcidnodoi.model.Contributor; +import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import jdk.nashorn.internal.ir.annotations.Ignore; +import org.apache.commons.io.IOUtils; +import org.apache.commons.text.similarity.JaccardSimilarity; +import org.apache.commons.text.similarity.JaroWinklerSimilarity; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.text.Normalizer; +import java.util.*; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +public class OrcidNoDoiTest { + + private static final Logger logger = LoggerFactory.getLogger(OrcidNoDoiTest.class); + + String nameA = "Khairy"; + String surnameA = "Abdel Dayem"; + String otherNameA = "Dayem MKA"; + String nameB = "K"; + String surnameB = "Abdel-Dayem"; + String orcidIdA = "0000-0003-2760-1191"; + Double threshold = 0.8; + + @Test + @Ignore + private void similarityTest() throws Exception { + logger.info("running testSimilarity ...."); + logger + .info( + "JaroWinklerSimilarity: " + + Double.toString(similarityJaroWinkler(nameA, surnameA, nameB, surnameB))); + logger + .info( + "JaccardSimilarity: " + Double.toString(similarityJaccard(nameA, surnameA, nameB, surnameB))); + } + + @Test + @Ignore + private void bestMatchTest() throws Exception { + logger.info("running bestMatchTest ...."); + String contributor = surnameB + ", " + nameB; + logger.info("score: " + Double.toString(bestMatch(surnameA, nameA, contributor))); + } + + private static Double bestMatch(String authorSurname, String authorName, String contributor) { + logger.debug(authorSurname + " " + authorName + " vs " + contributor); + String[] contributorSplitted = contributor.split(" "); + if (contributorSplitted.length == 0) { + return 0.0; + } + final String contributorName = contributorSplitted[contributorSplitted.length - 1]; + String contributorSurname = ""; + if (contributorSplitted.length > 1) { + StringJoiner joiner = new StringJoiner(" "); + for (int i = 0; i < contributorSplitted.length - 1; i++) { + joiner.add(contributorSplitted[i]); + } + contributorSurname = joiner.toString(); + } + logger + .debug( + "contributorName: " + contributorName + + " contributorSurname: " + contributorSurname); + String authorNameNrm = normalize(authorName); + String authorSurnameNrm = normalize(authorSurname); + String contributorNameNrm = normalize(contributorName); + String contributorSurnameNrm = normalize(contributorSurname); + Double sm1 = similarity(authorNameNrm, authorSurnameNrm, contributorNameNrm, contributorSurnameNrm); + Double sm2 = similarity(authorNameNrm, authorSurnameNrm, contributorSurnameNrm, contributorNameNrm); + if (sm1.compareTo(sm2) >= 0) { + return sm1; + } + return sm2; + } + + private static Double similarity(String nameA, String surnameA, String nameB, String surnameB) { + Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB); + logger + .debug(nameA + ", " + surnameA + " <> " + nameB + ", " + surnameB + " score: " + Double.toString(score)); + return score; + } + + private static Double similarityJaccard(String nameA, String surnameA, String nameB, String surnameB) { + return new JaccardSimilarity().apply(normalize(parse(nameA, surnameA)), normalize(parse(nameB, surnameB))); + } + + private static Double similarityJaroWinkler(String nameA, String surnameA, String nameB, String surnameB) { + return new JaroWinklerSimilarity().apply(normalize(parse(nameA, surnameA)), normalize(parse(nameB, surnameB))); + } + + private static String parse(String name, String surname) { + return surname + " " + name; + } + + private static String normalize(final String s) { + return nfd(s) + .toLowerCase() + // do not compact the regexes in a single expression, would cause StackOverflowError + // in case + // of large input strings + .replaceAll("(\\W)+", " ") + .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") + .replaceAll("(\\p{Punct})+", " ") + .replaceAll("(\\d)+", " ") + .replaceAll("(\\n)+", " ") + .trim(); + } + + private static String nfd(final String s) { + return Normalizer.normalize(s, Normalizer.Form.NFD); + } + + @Test + @Ignore + public void readPublicationFieldsTest() + throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { + logger.info("running loadPublicationFieldsTest ...."); + String xml = IOUtils + .toString( + OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); + + if (xml == null) { + logger.info("Resource not found"); + } + XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); + if (p == null) { + logger.info("XMLRecordParserNoDoi null"); + } + WorkDataNoDoi workData = null; + try { + workData = p.VTDParseWorkData(xml.getBytes()); + } catch (Exception e) { + logger.error("parsing xml", e); + } + assertNotNull(workData); + assertNotNull(workData.getOid()); + logger.info("oid: " + workData.getOid()); + assertNotNull(workData.getTitles()); + logger.info("titles: "); + workData.getTitles().forEach(t -> { + logger.info(t); + }); + logger.info("source: " + workData.getSourceName()); + logger.info("type: " + workData.getType()); + logger.info("urls: "); + workData.getUrls().forEach(u -> { + logger.info(u); + }); + logger.info("publication date: "); + workData.getPublicationDates().forEach(d -> { + logger.info(d.getYear() + " - " + d.getMonth() + " - " + d.getDay()); + }); + logger.info("external id: "); + workData.getExtIds().removeIf(e -> e.getRelationShip() != null && !e.getRelationShip().equals("self")); + workData.getExtIds().forEach(e -> { + logger.info(e.getType() + " - " + e.getValue() + " - " + e.getRelationShip()); + }); + logger.info("contributors: "); + workData.getContributors().forEach(c -> { + logger + .info( + c.getName() + " - " + c.getRole() + " - " + c.getSequence()); + }); + + } + + private void updateRanks(List contributors) { + boolean seqFound = false; + if (contributors + .stream() + .filter( + c -> c.getRole() != null && c.getSequence() != null && + c.getRole().equals("author") && (c.getSequence().equals("first") || + c.getSequence().equals("additional"))) + .count() > 0) { + seqFound = true; + logger.info("sequence data found"); + } + if (!seqFound) { + List seqIds = Arrays.asList(0); + contributors.forEach(c -> { + int currentSeq = seqIds.get(0) + 1; + seqIds.set(0, currentSeq); + c.setSequence(Integer.toString(seqIds.get(0))); + }); + } + } + + private void updateAuthorsSimpleMatch(List contributors, AuthorData author) { + contributors.forEach(c -> { + if (c.isSimpleMatch()) { + logger.info("simple match on : " + c.getCreditName()); + c.setName(author.getName()); + c.setSurname(author.getSurname()); + c.setOid(author.getOid()); + } + }); + updateRanks(contributors); + } + + private void updateAuthorsSimilarityMatch(List contributors, AuthorData author) { + logger.info("inside updateAuthorsSimilarityMatch ..."); + contributors.forEach(c -> { + logger + .info( + c.getOid() + " - " + c.getCreditName() + " - " + + c.getName() + " - " + c.getSurname() + " - " + + c.getRole() + " - " + c.getSequence() + " - best: " + c.isBestMatch() + " - simpe: " + + c.isSimpleMatch()); + }); + + contributors + .stream() + .filter(c -> c.isBestMatch()) + .forEach(c -> { + logger.info("similarity match on : " + c.getCreditName()); + c.setName(author.getName()); + c.setSurname(author.getSurname()); + c.setOid(author.getOid()); + }); + updateRanks(contributors); + } + + @Test + @Ignore + public void authorSimilarityMatchTest() throws Exception { + logger.info("running authorSimilarityMatchTest ...."); + authorMatchTest("activity_work_0000-0003-2760-1191-similarity.xml"); + } + + @Test + private void authorSimpleMatchTest() throws Exception { + logger.info("running authorSimpleMatchTest ...."); + authorMatchTest("activity_work_0000-0003-2760-1191.xml"); + } + + private void authorMatchTest(String orcidWork) + throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { + AuthorData author = new AuthorData(); + author.setName(nameA); + author.setSurname(surnameA); + author.setOid(orcidIdA); + String xml = IOUtils + .toString( + OrcidNoDoiTest.class.getResourceAsStream(orcidWork)); + + if (xml == null) { + logger.info("Resource not found"); + } + XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); + if (p == null) { + logger.info("XMLRecordParserNoDoi null"); + } + WorkDataNoDoi workData = null; + try { + workData = p.VTDParseWorkData(xml.getBytes()); + } catch (Exception e) { + logger.error("parsing xml", e); + } + assertNotNull(workData); + int matchCounter = 0; + List matchCounters = Arrays.asList(matchCounter); + Contributor contributor = null; + workData.getContributors().forEach(c -> { + if (normalize(c.getCreditName()).contains(normalize(author.getName())) || + normalize(c.getCreditName()).contains(normalize(author.getSurname())) || + ((author.getOtherName() != null) + && normalize(c.getCreditName()).contains(normalize(author.getOtherName())))) { + matchCounters.set(0, matchCounters.get(0) + 1); + c.setSimpleMatch(true); + } + }); + logger.info("match counter: " + Integer.toString(matchCounters.get(0))); + if (matchCounters.get(0) == 1) { + updateAuthorsSimpleMatch(workData.getContributors(), author); + } else if (matchCounters.get(0) > 1) { + Optional optCon = workData + .getContributors() + .stream() + .filter(c -> c.isSimpleMatch()) + .map(c -> { + c.setScore(bestMatch(nameA, surnameA, c.getCreditName())); + logger.debug("nella map: " + c.getCreditName() + " score: " + c.getScore()); + return c; + }) + .filter(c -> c.getScore() >= threshold) + .max(Comparator.comparing(c -> c.getScore())); + Contributor bestMatchContributor = null; + if (optCon.isPresent()) { + bestMatchContributor = optCon.get(); + bestMatchContributor.setBestMatch(true); + logger.info("best match: " + bestMatchContributor.getCreditName()); + updateAuthorsSimilarityMatch(workData.getContributors(), author); + } + + } + + logger.info("UPDATED contributors: "); + workData.getContributors().forEach(c -> { + logger + .info( + c.getOid() + " - " + c.getCreditName() + " - " + + c.getName() + " - " + c.getSurname() + " - " + + c.getRole() + " - " + c.getSequence()); + }); + } +} + +// +// orcid_RDD = sc.textFile(ORCID_DUMP_PATH) +// no_doi_works_RDD = orcid_RDD.map(orcid_map).filter(lambda x:x is not None).map(lambda x: json.dumps(x)).saveAsTextFile(path=ORCID_OPENAIRE_PATH,compressionCodecClass="org.apache.hadoop.io.compress.GzipCodec") +// \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/activity_work_0000-0003-2760-1191.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/activity_work_0000-0003-2760-1191.xml new file mode 100644 index 000000000..485f4f8e8 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/activity_work_0000-0003-2760-1191.xml @@ -0,0 +1,106 @@ + + + 2016-12-12T23:02:05.233Z + 2016-12-13T09:08:16.412Z + + + https://orcid.org/0000-0002-9157-3431 + 0000-0002-9157-3431 + orcid.org + + Europe PubMed Central + + + Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which + Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for + ST-Segment-Elevation Myocardial Infarction. + + + formatted-unspecified + Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta + Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016 + + journal-article + + 2016 + 11 + + + + pmid + 27899851 + 27899851 + self + + + pmc + PMC5126442 + PMC5126442 + self + + + http://europepmc.org/abstract/med/27899851 + + + Abdel-Dayem K + + first + author + + + + Eweda II + + first + author + + + + El-Sherbiny A + + first + author + + + + Dimitry MO + + first + author + + + + Nammas W + + first + author + + + + diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/activity_work_0000-0002-5982-8983.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0002-5982-8983.xml similarity index 100% rename from dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/activity_work_0000-0002-5982-8983.xml rename to dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0002-5982-8983.xml diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191-similarity.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191-similarity.xml new file mode 100644 index 000000000..650d5a4cb --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191-similarity.xml @@ -0,0 +1,113 @@ + + + 2016-12-12T23:02:05.233Z + 2016-12-13T09:08:16.412Z + + + https://orcid.org/0000-0002-9157-3431 + 0000-0002-9157-3431 + orcid.org + + Europe PubMed Central + + + Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which + Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for + ST-Segment-Elevation Myocardial Infarction. + + + formatted-unspecified + Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta + Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016 + + journal-article + + 2016 + 11 + + + + pmid + 27899851 + 27899851 + self + + + pmc + PMC5126442 + PMC5126442 + self + + + http://europepmc.org/abstract/med/27899851 + + + Abdel-Dayem K + + first + author + + + + Abdel-Dayem Fake + + first + author + + + + Eweda II + + first + author + + + + El-Sherbiny A + + first + author + + + + Dimitry MO + + first + author + + + + Nammas W + + first + author + + + + diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191.xml new file mode 100644 index 000000000..485f4f8e8 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191.xml @@ -0,0 +1,106 @@ + + + 2016-12-12T23:02:05.233Z + 2016-12-13T09:08:16.412Z + + + https://orcid.org/0000-0002-9157-3431 + 0000-0002-9157-3431 + orcid.org + + Europe PubMed Central + + + Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which + Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for + ST-Segment-Elevation Myocardial Infarction. + + + formatted-unspecified + Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta + Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016 + + journal-article + + 2016 + 11 + + + + pmid + 27899851 + 27899851 + self + + + pmc + PMC5126442 + PMC5126442 + self + + + http://europepmc.org/abstract/med/27899851 + + + Abdel-Dayem K + + first + author + + + + Eweda II + + first + author + + + + El-Sherbiny A + + first + author + + + + Dimitry MO + + first + author + + + + Nammas W + + first + author + + + + From d6498278edc87aeb15ee61b33edf7f280829b56a Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 25 Jun 2020 18:43:29 +0200 Subject: [PATCH 002/108] added workflow to generate seq(orcidId,work) and seq(orcidId,enrichedWork) --- .../orcid/ActivitiesDecompressor.java | 2 +- .../doiboost/orcid/SummariesDecompressor.java | 2 +- .../doiboost/orcid/json/JsonHelper.java | 16 + .../orcidnodoi/ActivitiesDumpReader.java | 149 +++++ .../orcidnodoi/GenOrcidAuthorWork.java | 52 ++ .../SparkGenEnrichedOrcidWorks.java | 119 ++++ .../json/JsonWriter.java | 2 +- .../orcidnodoi/model/Contributor.java | 6 +- .../orcidnodoi/model/WorkDataNoDoi.java | 1 - .../orcidnodoi/similarity/AuthorMatcher.java | 204 +++++++ .../oozie_app/config-default.xml | 22 + .../oozie_app/workflow.xml | 524 ++++++++++++++++++ .../gen_enriched_orcid_works_parameters.json | 7 + .../orcidnodoi/xml/OrcidNoDoiTest.java | 250 +-------- 14 files changed, 1125 insertions(+), 231 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java rename dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/{orcid => orcidnodoi}/json/JsonWriter.java (94%) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java index 570fdef17..80ccd71a1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java @@ -19,7 +19,7 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.mortbay.log.Log; -import eu.dnetlib.doiboost.orcid.json.JsonWriter; +import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; import eu.dnetlib.doiboost.orcid.model.WorkData; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java index f0bbb5c32..603bfedf6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java @@ -19,7 +19,7 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.mortbay.log.Log; -import eu.dnetlib.doiboost.orcid.json.JsonWriter; +import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; import eu.dnetlib.doiboost.orcid.model.AuthorData; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java new file mode 100644 index 000000000..13a3cee8f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java @@ -0,0 +1,16 @@ + +package eu.dnetlib.doiboost.orcid.json; + +import com.google.gson.Gson; +import com.google.gson.JsonObject; +import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; + +public class JsonHelper { + + public static String createOidWork(WorkDataNoDoi workData) { + JsonObject oidWork = new JsonObject(); + oidWork.addProperty("oid", workData.getOid()); + oidWork.addProperty("work", new Gson().toJson(workData)); + return oidWork.toString(); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java new file mode 100644 index 000000000..7eb6faf54 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -0,0 +1,149 @@ + +package eu.dnetlib.doiboost.orcidnodoi; + +import eu.dnetlib.doiboost.orcid.json.JsonHelper; +import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; +import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.mortbay.log.Log; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URI; + +public class ActivitiesDumpReader { + + private static final int MAX_XML_WORKS_PARSED = -1; + private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; + + public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) + throws Exception { + String uri = inputUri; + FileSystem fs = FileSystem.get(URI.create(uri), conf); + Path inputPath = new Path(uri); + CompressionCodecFactory factory = new CompressionCodecFactory(conf); + CompressionCodec codec = factory.getCodec(inputPath); + if (codec == null) { + System.err.println("No codec found for " + uri); + System.exit(1); + } + CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension()); + InputStream gzipInputStream = null; + try { + gzipInputStream = codec.createInputStream(fs.open(inputPath)); + parseTarActivities(fs, conf, gzipInputStream, outputPath); + + } finally { + Log.debug("Closing gzip stream"); + IOUtils.closeStream(gzipInputStream); + } + } + + private static void parseTarActivities( + FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + int counter = 0; + int noDoiFound = 0; + int errorFromOrcidFound = 0; + int xmlParserErrorFound = 0; + try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) { + TarArchiveEntry entry = null; + + try (SequenceFile.Writer writer = SequenceFile + .createWriter( + conf, + SequenceFile.Writer.file(outputPath), + SequenceFile.Writer.keyClass(Text.class), + SequenceFile.Writer.valueClass(Text.class))) { + while ((entry = tais.getNextTarEntry()) != null) { + String filename = entry.getName(); + + try { + if (entry.isDirectory() || !filename.contains("works")) { + + } else { + Log.debug("XML work entry name: " + entry.getName()); + counter++; + BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from + // tarInput + String line; + StringBuffer buffer = new StringBuffer(); + while ((line = br.readLine()) != null) { + buffer.append(line); + } + WorkDataNoDoi workDataNoDoi = XMLRecordParserNoDoi.VTDParseWorkData(buffer.toString().getBytes()); + if (workDataNoDoi != null) { + if (workDataNoDoi.getErrorCode() != null) { + errorFromOrcidFound += 1; + Log + .debug( + "error from Orcid with code " + + workDataNoDoi.getErrorCode() + + " for entry " + + entry.getName()); + continue; + } + boolean isDoiFound = workDataNoDoi.getExtIds().stream() + .filter(e -> e.getType()!=null) + .anyMatch(e -> e.getType().equals("doi")); + if (!isDoiFound) { + String jsonData = JsonHelper.createOidWork(workDataNoDoi); + Log.debug("oid: " + workDataNoDoi.getOid() + " data: " + jsonData); + + final Text key = new Text(workDataNoDoi.getOid()); + final Text value = new Text(jsonData); + + try { + writer.append(key, value); + } catch (IOException e) { + Log.debug("Writing to sequence file: " + e.getMessage()); + Log.debug(e); + throw new RuntimeException(e); + } + noDoiFound += 1; + } + + } else { + Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString()); + xmlParserErrorFound += 1; + } + } + } catch (Exception e) { + Log + .warn( + "Parsing work from tar archive and xml work: " + filename + " " + e.getMessage()); + Log.warn(e); + } + + if ((counter % XML_WORKS_PARSED_COUNTER_LOG_INTERVAL) == 0) { + Log.info("Current xml works parsed: " + counter); + } + + if ((MAX_XML_WORKS_PARSED > -1) && (counter > MAX_XML_WORKS_PARSED)) { + break; + } + } + } + } catch (IOException e) { + Log.warn("Parsing work from gzip archive: " + e.getMessage()); + Log.warn(e); + throw new RuntimeException(e); + } + Log.info("Activities parse completed"); + Log.info("Total XML works parsed: " + counter); + Log.info("Total no doi work found: " + noDoiFound); + Log.info("Error from Orcid found: " + errorFromOrcidFound); + Log.info("Error parsing xml work found: " + xmlParserErrorFound); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java new file mode 100644 index 000000000..b82f4bc4c --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.doiboost.orcidnodoi; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.doiboost.orcid.OrcidDSManager; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.mortbay.log.Log; + +import java.io.IOException; + +public class GenOrcidAuthorWork extends OrcidDSManager { + + private String activitiesFileNameTarGz; + private String outputWorksPath; + private String workingPath; + + public static void main(String[] args) throws IOException, Exception { + GenOrcidAuthorWork genOrcidAuthorWork = new GenOrcidAuthorWork(); + genOrcidAuthorWork.loadArgs(args); + genOrcidAuthorWork.generateAuthorsDOIsData(); + } + + public void generateAuthorsDOIsData() throws Exception { + Configuration conf = initConfigurationObject(); + FileSystem fs = initFileSystemObject(conf); + String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); + Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputWorksPath)); + ActivitiesDumpReader.parseGzActivities(conf, tarGzUri, outputPath); + } + + private void loadArgs(String[] args) throws IOException, Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GenOrcidAuthorWork.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json"))); + parser.parseArgument(args); + + hdfsServerUri = parser.get("hdfsServerUri"); + Log.info("HDFS URI: " + hdfsServerUri); + workingPath = parser.get("workingPath"); + Log.info("Working Path: " + workingPath); + activitiesFileNameTarGz = parser.get("activitiesFileNameTarGz"); + Log.info("Activities File Name: " + activitiesFileNameTarGz); + outputWorksPath = parser.get("outputWorksPath"); + Log.info("Output Author Work Data: " + outputWorksPath); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java new file mode 100644 index 000000000..6bb31bcf6 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -0,0 +1,119 @@ + +package eu.dnetlib.doiboost.orcidnodoi; + +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import com.google.gson.JsonParser; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.Text; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + +import java.io.IOException; +import java.util.Objects; +import java.util.Optional; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class SparkGenEnrichedOrcidWorks { + + public static void main(String[] args) throws IOException, Exception { + Logger logger = LoggerFactory.getLogger(SparkGenEnrichedOrcidWorks.class); + logger.info("[ SparkGenerateDoiAuthorList STARTED]"); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkGenEnrichedOrcidWorks.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json"))); + parser.parseArgument(args); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); + final String workingPath = parser.get("workingPath"); + logger.info("workingPath: ", workingPath); + final String outputEnrichedWorksPath = parser.get("outputEnrichedWorksPath"); + logger.info("outputEnrichedWorksPath: ", outputEnrichedWorksPath); + final String outputWorksPath = parser.get("outputWorksPath"); + logger.info("outputWorksPath: ", outputWorksPath); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaPairRDD summariesRDD = sc + .sequenceFile(workingPath + "../orcid_summaries/output/authors.seq", Text.class, Text.class); + Dataset summariesDataset = spark + .createDataset( + summariesRDD.map(seq -> loadAuthorFromJson(seq._1(), seq._2())).rdd(), + Encoders.bean(AuthorData.class)); + + JavaPairRDD activitiesRDD = sc + .sequenceFile(workingPath + outputWorksPath + "works_X.seq" , Text.class, Text.class); + Dataset activitiesDataset = spark + .createDataset( + activitiesRDD.map(seq -> loadWorkFromJson(seq._1(), seq._2())).rdd(), + Encoders.bean(WorkDataNoDoi.class)); + + activitiesDataset + .joinWith( + summariesDataset, + activitiesDataset.col("oid").equalTo(summariesDataset.col("oid")), "inner") + .map( + (MapFunction, Tuple2>) value -> { + WorkDataNoDoi w = value._1; + AuthorData a = value._2; + AuthorMatcher.match(a, w.getContributors()); + return new Tuple2<>(a.getOid(), w); + }, + Encoders.tuple(Encoders.STRING(), Encoders.bean(WorkDataNoDoi.class))) + .filter(Objects::nonNull) + .toJavaRDD() + .saveAsTextFile(workingPath + outputEnrichedWorksPath);; + }); + } + + private static AuthorData loadAuthorFromJson(Text orcidId, Text json) { + AuthorData authorData = new AuthorData(); + authorData.setOid(orcidId.toString()); + JsonElement jElement = new JsonParser().parse(json.toString()); + authorData.setName(getJsonValue(jElement, "name")); + authorData.setSurname(getJsonValue(jElement, "surname")); + authorData.setCreditName(getJsonValue(jElement, "creditname")); + return authorData; + } + + private static WorkDataNoDoi loadWorkFromJson(Text orcidId, Text json) { + WorkDataNoDoi workData = new Gson().fromJson(json.toString(), WorkDataNoDoi.class); + return workData; + } + + private static String getJsonValue(JsonElement jElement, String property) { + if (jElement.getAsJsonObject().has(property)) { + JsonElement name = null; + name = jElement.getAsJsonObject().get(property); + if (name != null && !name.isJsonNull()) { + return name.getAsString(); + } + } + return null; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonWriter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java similarity index 94% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonWriter.java rename to dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java index 35676d5ba..7f7e3a10a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonWriter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java @@ -1,5 +1,5 @@ -package eu.dnetlib.doiboost.orcid.json; +package eu.dnetlib.doiboost.orcidnodoi.json; import com.google.gson.JsonObject; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java index 42076de5d..8a170de09 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java @@ -8,9 +8,9 @@ import eu.dnetlib.doiboost.orcid.model.AuthorData; public class Contributor extends AuthorData implements Serializable { private String sequence; private String role; - private boolean simpleMatch = false; - private Double score = 0.0; - private boolean bestMatch = false; + private transient boolean simpleMatch = false; + private transient Double score = 0.0; + private transient boolean bestMatch = false; public String getSequence() { return sequence; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java index ee13454e1..5756521e7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java @@ -97,5 +97,4 @@ public class WorkDataNoDoi implements Serializable { public void setContributors(List contributors) { this.contributors = contributors; } - } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java new file mode 100644 index 000000000..09fd8b36b --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java @@ -0,0 +1,204 @@ + +package eu.dnetlib.doiboost.orcidnodoi.similarity; + +import java.io.IOException; +import java.text.Normalizer; +import java.util.*; + +import org.apache.commons.text.similarity.JaroWinklerSimilarity; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.ximpleware.NavException; +import com.ximpleware.ParseException; +import com.ximpleware.XPathEvalException; +import com.ximpleware.XPathParseException; + +import eu.dnetlib.dhp.parser.utility.VtdException; +import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.doiboost.orcidnodoi.model.Contributor; +import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; + +public class AuthorMatcher { + + private static final Logger logger = LoggerFactory.getLogger(AuthorMatcher.class); + private static final Double threshold = 0.8; + + public static void match(AuthorData author, List contributors) + throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { + + int matchCounter = 0; + List matchCounters = Arrays.asList(matchCounter); + Contributor contributor = null; + contributors.forEach(c -> { + if (normalize(c.getCreditName()).contains(normalize(author.getName())) || + normalize(c.getCreditName()).contains(normalize(author.getSurname())) || + ((author.getOtherName() != null) + && normalize(c.getCreditName()).contains(normalize(author.getOtherName())))) { + matchCounters.set(0, matchCounters.get(0) + 1); + c.setSimpleMatch(true); + } + }); + logger.info("match counter: " + Integer.toString(matchCounters.get(0))); + if (matchCounters.get(0) == 1) { + updateAuthorsSimpleMatch(contributors, author); + } else if (matchCounters.get(0) > 1) { + Optional optCon = contributors + .stream() + .filter(c -> c.isSimpleMatch()) + .map(c -> { + c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); + logger.debug("nella map: " + c.getCreditName() + " score: " + c.getScore()); + return c; + }) + .filter(c -> c.getScore() >= threshold) + .max(Comparator.comparing(c -> c.getScore())); + Contributor bestMatchContributor = null; + if (optCon.isPresent()) { + bestMatchContributor = optCon.get(); + bestMatchContributor.setBestMatch(true); + logger.info("best match: " + bestMatchContributor.getCreditName()); + updateAuthorsSimilarityMatch(contributors, author); + } + + } + + logger.info("UPDATED contributors: "); + contributors.forEach(c -> { + logger + .info( + c.getOid() + " - " + c.getCreditName() + " - " + + c.getName() + " - " + c.getSurname() + " - " + + c.getRole() + " - " + c.getSequence()); + }); + } + + private static Double bestMatch(String authorSurname, String authorName, String contributor) { + logger.debug(authorSurname + " " + authorName + " vs " + contributor); + String[] contributorSplitted = contributor.split(" "); + if (contributorSplitted.length == 0) { + return 0.0; + } + final String contributorName = contributorSplitted[contributorSplitted.length - 1]; + String contributorSurname = ""; + if (contributorSplitted.length > 1) { + StringJoiner joiner = new StringJoiner(" "); + for (int i = 0; i < contributorSplitted.length - 1; i++) { + joiner.add(contributorSplitted[i]); + } + contributorSurname = joiner.toString(); + } + logger + .debug( + "contributorName: " + contributorName + + " contributorSurname: " + contributorSurname); + String authorNameNrm = normalize(authorName); + String authorSurnameNrm = normalize(authorSurname); + String contributorNameNrm = normalize(contributorName); + String contributorSurnameNrm = normalize(contributorSurname); + Double sm1 = similarity(authorNameNrm, authorSurnameNrm, contributorNameNrm, contributorSurnameNrm); + Double sm2 = similarity(authorNameNrm, authorSurnameNrm, contributorSurnameNrm, contributorNameNrm); + if (sm1.compareTo(sm2) >= 0) { + return sm1; + } + return sm2; + } + + private static Double similarity(String nameA, String surnameA, String nameB, String surnameB) { + Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB); + logger + .debug(nameA + ", " + surnameA + " <> " + nameB + ", " + surnameB + " score: " + Double.toString(score)); + return score; + } + + private static Double similarityJaroWinkler(String nameA, String surnameA, String nameB, String surnameB) { + return new JaroWinklerSimilarity().apply(normalize(parse(nameA, surnameA)), normalize(parse(nameB, surnameB))); + } + + private static String normalize(final String s) { + return nfd(s) + .toLowerCase() + // do not compact the regexes in a single expression, would cause StackOverflowError + // in case + // of large input strings + .replaceAll("(\\W)+", " ") + .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") + .replaceAll("(\\p{Punct})+", " ") + .replaceAll("(\\d)+", " ") + .replaceAll("(\\n)+", " ") + .trim(); + } + + private static String nfd(final String s) { + return Normalizer.normalize(s, Normalizer.Form.NFD); + } + + private static String parse(String name, String surname) { + return surname + " " + name; + } + + private static void updateAuthorsSimpleMatch(List contributors, AuthorData author) { + contributors.forEach(c -> { + if (c.isSimpleMatch()) { + logger.info("simple match on : " + c.getCreditName()); + c.setName(author.getName()); + c.setSurname(author.getSurname()); + c.setOid(author.getOid()); + } + }); + updateRanks(contributors); + } + + private static void updateAuthorsSimilarityMatch(List contributors, AuthorData author) { + logger.info("inside updateAuthorsSimilarityMatch ..."); + contributors.forEach(c -> { + logger + .info( + c.getOid() + " - " + c.getCreditName() + " - " + + c.getName() + " - " + c.getSurname() + " - " + + c.getRole() + " - " + c.getSequence() + " - best: " + c.isBestMatch() + " - simpe: " + + c.isSimpleMatch()); + }); + + contributors + .stream() + .filter(c -> c.isBestMatch()) + .forEach(c -> { + logger.info("similarity match on : " + c.getCreditName()); + c.setName(author.getName()); + c.setSurname(author.getSurname()); + c.setOid(author.getOid()); + }); + updateRanks(contributors); + } + + private static void updateRanks(List contributors) { + boolean seqFound = false; + if (contributors + .stream() + .filter( + c -> c.getRole() != null && c.getSequence() != null && + c.getRole().equals("author") && (c.getSequence().equals("first") || + c.getSequence().equals("additional"))) + .count() > 0) { + seqFound = true; + logger.info("sequence data found"); + } + if (!seqFound) { + List seqIds = Arrays.asList(0); + contributors.forEach(c -> { + int currentSeq = seqIds.get(0) + 1; + seqIds.set(0, currentSeq); + c.setSequence(Integer.toString(seqIds.get(0))); + }); + } + } + + private static String toJson(WorkDataNoDoi work) { + GsonBuilder builder = new GsonBuilder(); + Gson gson = builder.create(); + return gson.toJson(work); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/config-default.xml new file mode 100644 index 000000000..f2d51e260 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/config-default.xml @@ -0,0 +1,22 @@ + + + oozie.action.sharelib.for.java + spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + + + oozie.launcher.mapreduce.map.java.opts + -Xmx4g + + + jobTracker + hadoop-rm3.garr-pa1.d4science.org:8032 + + + nameNode + hdfs://hadoop-rm1.garr-pa1.d4science.org:8020 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml new file mode 100644 index 000000000..2486bdb24 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml @@ -0,0 +1,524 @@ + + + + workingPath_activities + the working dir base path + + + shell_cmd_0 + wget -O /tmp/ORCID_2019_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/18017660 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_0.tar.gz /data/orcid_activities/ORCID_2019_activites_0.tar.gz ; rm -f /tmp/ORCID_2019_activites_0.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 0 + + + shell_cmd_1 + wget -O /tmp/ORCID_2019_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/18017675 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_1.tar.gz /data/orcid_activities/ORCID_2019_activites_1.tar.gz ; rm -f /tmp/ORCID_2019_activites_1.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 1 + + + shell_cmd_2 + wget -O /tmp/ORCID_2019_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/18017717 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_2.tar.gz /data/orcid_activities/ORCID_2019_activites_2.tar.gz ; rm -f /tmp/ORCID_2019_activites_2.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 2 + + + shell_cmd_3 + wget -O /tmp/ORCID_2019_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/18017765 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_3.tar.gz /data/orcid_activities/ORCID_2019_activites_3.tar.gz ; rm -f /tmp/ORCID_2019_activites_3.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 3 + + + shell_cmd_4 + wget -O /tmp/ORCID_2019_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/18017831 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_4.tar.gz /data/orcid_activities/ORCID_2019_activites_4.tar.gz ; rm -f /tmp/ORCID_2019_activites_4.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 4 + + + shell_cmd_5 + wget -O /tmp/ORCID_2019_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/18017987 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_5.tar.gz /data/orcid_activities/ORCID_2019_activites_5.tar.gz ; rm -f /tmp/ORCID_2019_activites_5.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 5 + + + shell_cmd_6 + wget -O /tmp/ORCID_2019_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/18018053 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_6.tar.gz /data/orcid_activities/ORCID_2019_activites_6.tar.gz ; rm -f /tmp/ORCID_2019_activites_6.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 6 + + + shell_cmd_7 + wget -O /tmp/ORCID_2019_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/18018023 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_7.tar.gz /data/orcid_activities/ORCID_2019_activites_7.tar.gz ; rm -f /tmp/ORCID_2019_activites_7.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 7 + + + shell_cmd_8 + wget -O /tmp/ORCID_2019_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/18018248 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_8.tar.gz /data/orcid_activities/ORCID_2019_activites_8.tar.gz ; rm -f /tmp/ORCID_2019_activites_8.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 8 + + + shell_cmd_9 + wget -O /tmp/ORCID_2019_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/18018029 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_9.tar.gz /data/orcid_activities/ORCID_2019_activites_9.tar.gz ; rm -f /tmp/ORCID_2019_activites_9.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 9 + + + shell_cmd_X + wget -O /tmp/ORCID_2019_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/18018182 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_X.tar.gz /data/orcid_activities/ORCID_2019_activites_X.tar.gz ; rm -f /tmp/ORCID_2019_activites_X.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file X + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_0.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_0} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath_activities}/ + -n${nameNode} + -fORCID_2019_activites_0.tar.gz + -owno_doi_works/works_0.seq + + + + + + + + + ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_1.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_1} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath_activities}/ + -n${nameNode} + -fORCID_2019_activites_1.tar.gz + -owno_doi_works/works_1.seq + + + + + + + + + ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_2.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_2} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath_activities}/ + -n${nameNode} + -fORCID_2019_activites_2.tar.gz + -owno_doi_works/works_2.seq + + + + + + + + + ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_3.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_3} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath_activities}/ + -n${nameNode} + -fORCID_2019_activites_3.tar.gz + -owno_doi_works/works_3.seq + + + + + + + + + ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_4.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_4} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath_activities}/ + -n${nameNode} + -fORCID_2019_activites_4.tar.gz + -owno_doi_works/works_4.seq + + + + + + + + + ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_5.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_5} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath_activities}/ + -n${nameNode} + -fORCID_2019_activites_5.tar.gz + -owno_doi_works/works_5.seq + + + + + + + + + ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_6.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_6} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath_activities}/ + -n${nameNode} + -fORCID_2019_activites_6.tar.gz + -owno_doi_works/works_6.seq + + + + + + + + + + ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_7.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_7} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath_activities}/ + -n${nameNode} + -fORCID_2019_activites_7.tar.gz + -owno_doi_works/works_7.seq + + + + + + + + + ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_8.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_8} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath_activities}/ + -n${nameNode} + -fORCID_2019_activites_8.tar.gz + -owno_doi_works/works_8.seq + + + + + + + + + ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_9.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_9} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath_activities}/ + -n${nameNode} + -fORCID_2019_activites_9.tar.gz + -owno_doi_works/works_9.seq + + + + + + + + + ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_X.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_X} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath_activities}/ + -n${nameNode} + -fORCID_2019_activites_X.tar.gz + -owno_doi_works/works_X.seq + + + + + + + + + + ${jobTracker} + ${nameNode} + yarn + cluster + Gen_Enriched_Orcid_Works + eu.dnetlib.doiboost.orcidnodoi.SparkGenEnrichedOrcidWorks + dhp-doiboost-1.2.3-SNAPSHOT.jar + --num-executors 10 --conf spark.yarn.jars="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2" --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} + + -w${workingPath}/ + -owno_doi_works/ + -oewno_doi_enriched_works/ + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json new file mode 100644 index 000000000..c3a8f92ec --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json @@ -0,0 +1,7 @@ +[ + {"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the default work path", "paramRequired": true}, + {"paramName":"f", "paramLongName":"activitiesFileNameTarGz", "paramDescription": "the name of the activities orcid file", "paramRequired": true}, + {"paramName":"ow", "paramLongName":"outputWorksPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true}, + {"paramName":"oew", "paramLongName":"outputEnrichedWorksPath", "paramDescription": "the relative folder of the sequencial file to write the data", "paramRequired": true} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index 31f8432ac..6a5faddbd 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -1,15 +1,12 @@ package eu.dnetlib.doiboost.orcidnodoi.xml; -import com.ximpleware.NavException; -import com.ximpleware.ParseException; -import com.ximpleware.XPathEvalException; -import com.ximpleware.XPathParseException; -import eu.dnetlib.dhp.parser.utility.VtdException; -import eu.dnetlib.doiboost.orcid.model.AuthorData; -import eu.dnetlib.doiboost.orcidnodoi.model.Contributor; -import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; -import jdk.nashorn.internal.ir.annotations.Ignore; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.IOException; +import java.text.Normalizer; +import java.util.*; + import org.apache.commons.io.IOUtils; import org.apache.commons.text.similarity.JaccardSimilarity; import org.apache.commons.text.similarity.JaroWinklerSimilarity; @@ -17,11 +14,20 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.text.Normalizer; -import java.util.*; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.ximpleware.NavException; +import com.ximpleware.ParseException; +import com.ximpleware.XPathEvalException; +import com.ximpleware.XPathParseException; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import eu.dnetlib.dhp.parser.utility.VtdException; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.doiboost.orcidnodoi.model.Contributor; +import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; +import jdk.nashorn.internal.ir.annotations.Ignore; public class OrcidNoDoiTest { @@ -33,100 +39,10 @@ public class OrcidNoDoiTest { String nameB = "K"; String surnameB = "Abdel-Dayem"; String orcidIdA = "0000-0003-2760-1191"; - Double threshold = 0.8; @Test @Ignore - private void similarityTest() throws Exception { - logger.info("running testSimilarity ...."); - logger - .info( - "JaroWinklerSimilarity: " - + Double.toString(similarityJaroWinkler(nameA, surnameA, nameB, surnameB))); - logger - .info( - "JaccardSimilarity: " + Double.toString(similarityJaccard(nameA, surnameA, nameB, surnameB))); - } - - @Test - @Ignore - private void bestMatchTest() throws Exception { - logger.info("running bestMatchTest ...."); - String contributor = surnameB + ", " + nameB; - logger.info("score: " + Double.toString(bestMatch(surnameA, nameA, contributor))); - } - - private static Double bestMatch(String authorSurname, String authorName, String contributor) { - logger.debug(authorSurname + " " + authorName + " vs " + contributor); - String[] contributorSplitted = contributor.split(" "); - if (contributorSplitted.length == 0) { - return 0.0; - } - final String contributorName = contributorSplitted[contributorSplitted.length - 1]; - String contributorSurname = ""; - if (contributorSplitted.length > 1) { - StringJoiner joiner = new StringJoiner(" "); - for (int i = 0; i < contributorSplitted.length - 1; i++) { - joiner.add(contributorSplitted[i]); - } - contributorSurname = joiner.toString(); - } - logger - .debug( - "contributorName: " + contributorName + - " contributorSurname: " + contributorSurname); - String authorNameNrm = normalize(authorName); - String authorSurnameNrm = normalize(authorSurname); - String contributorNameNrm = normalize(contributorName); - String contributorSurnameNrm = normalize(contributorSurname); - Double sm1 = similarity(authorNameNrm, authorSurnameNrm, contributorNameNrm, contributorSurnameNrm); - Double sm2 = similarity(authorNameNrm, authorSurnameNrm, contributorSurnameNrm, contributorNameNrm); - if (sm1.compareTo(sm2) >= 0) { - return sm1; - } - return sm2; - } - - private static Double similarity(String nameA, String surnameA, String nameB, String surnameB) { - Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB); - logger - .debug(nameA + ", " + surnameA + " <> " + nameB + ", " + surnameB + " score: " + Double.toString(score)); - return score; - } - - private static Double similarityJaccard(String nameA, String surnameA, String nameB, String surnameB) { - return new JaccardSimilarity().apply(normalize(parse(nameA, surnameA)), normalize(parse(nameB, surnameB))); - } - - private static Double similarityJaroWinkler(String nameA, String surnameA, String nameB, String surnameB) { - return new JaroWinklerSimilarity().apply(normalize(parse(nameA, surnameA)), normalize(parse(nameB, surnameB))); - } - - private static String parse(String name, String surname) { - return surname + " " + name; - } - - private static String normalize(final String s) { - return nfd(s) - .toLowerCase() - // do not compact the regexes in a single expression, would cause StackOverflowError - // in case - // of large input strings - .replaceAll("(\\W)+", " ") - .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") - .replaceAll("(\\p{Punct})+", " ") - .replaceAll("(\\d)+", " ") - .replaceAll("(\\n)+", " ") - .trim(); - } - - private static String nfd(final String s) { - return Normalizer.normalize(s, Normalizer.Form.NFD); - } - - @Test - @Ignore - public void readPublicationFieldsTest() + private void readPublicationFieldsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); String xml = IOUtils @@ -178,78 +94,10 @@ public class OrcidNoDoiTest { } - private void updateRanks(List contributors) { - boolean seqFound = false; - if (contributors - .stream() - .filter( - c -> c.getRole() != null && c.getSequence() != null && - c.getRole().equals("author") && (c.getSequence().equals("first") || - c.getSequence().equals("additional"))) - .count() > 0) { - seqFound = true; - logger.info("sequence data found"); - } - if (!seqFound) { - List seqIds = Arrays.asList(0); - contributors.forEach(c -> { - int currentSeq = seqIds.get(0) + 1; - seqIds.set(0, currentSeq); - c.setSequence(Integer.toString(seqIds.get(0))); - }); - } - } - - private void updateAuthorsSimpleMatch(List contributors, AuthorData author) { - contributors.forEach(c -> { - if (c.isSimpleMatch()) { - logger.info("simple match on : " + c.getCreditName()); - c.setName(author.getName()); - c.setSurname(author.getSurname()); - c.setOid(author.getOid()); - } - }); - updateRanks(contributors); - } - - private void updateAuthorsSimilarityMatch(List contributors, AuthorData author) { - logger.info("inside updateAuthorsSimilarityMatch ..."); - contributors.forEach(c -> { - logger - .info( - c.getOid() + " - " + c.getCreditName() + " - " + - c.getName() + " - " + c.getSurname() + " - " + - c.getRole() + " - " + c.getSequence() + " - best: " + c.isBestMatch() + " - simpe: " - + c.isSimpleMatch()); - }); - - contributors - .stream() - .filter(c -> c.isBestMatch()) - .forEach(c -> { - logger.info("similarity match on : " + c.getCreditName()); - c.setName(author.getName()); - c.setSurname(author.getSurname()); - c.setOid(author.getOid()); - }); - updateRanks(contributors); - } - @Test - @Ignore - public void authorSimilarityMatchTest() throws Exception { - logger.info("running authorSimilarityMatchTest ...."); - authorMatchTest("activity_work_0000-0003-2760-1191-similarity.xml"); - } - - @Test - private void authorSimpleMatchTest() throws Exception { + public void authorMatchTest() throws Exception { logger.info("running authorSimpleMatchTest ...."); - authorMatchTest("activity_work_0000-0003-2760-1191.xml"); - } - - private void authorMatchTest(String orcidWork) - throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { + String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml"; AuthorData author = new AuthorData(); author.setName(nameA); author.setSurname(surnameA); @@ -272,55 +120,9 @@ public class OrcidNoDoiTest { logger.error("parsing xml", e); } assertNotNull(workData); - int matchCounter = 0; - List matchCounters = Arrays.asList(matchCounter); - Contributor contributor = null; - workData.getContributors().forEach(c -> { - if (normalize(c.getCreditName()).contains(normalize(author.getName())) || - normalize(c.getCreditName()).contains(normalize(author.getSurname())) || - ((author.getOtherName() != null) - && normalize(c.getCreditName()).contains(normalize(author.getOtherName())))) { - matchCounters.set(0, matchCounters.get(0) + 1); - c.setSimpleMatch(true); - } - }); - logger.info("match counter: " + Integer.toString(matchCounters.get(0))); - if (matchCounters.get(0) == 1) { - updateAuthorsSimpleMatch(workData.getContributors(), author); - } else if (matchCounters.get(0) > 1) { - Optional optCon = workData - .getContributors() - .stream() - .filter(c -> c.isSimpleMatch()) - .map(c -> { - c.setScore(bestMatch(nameA, surnameA, c.getCreditName())); - logger.debug("nella map: " + c.getCreditName() + " score: " + c.getScore()); - return c; - }) - .filter(c -> c.getScore() >= threshold) - .max(Comparator.comparing(c -> c.getScore())); - Contributor bestMatchContributor = null; - if (optCon.isPresent()) { - bestMatchContributor = optCon.get(); - bestMatchContributor.setBestMatch(true); - logger.info("best match: " + bestMatchContributor.getCreditName()); - updateAuthorsSimilarityMatch(workData.getContributors(), author); - } - - } - - logger.info("UPDATED contributors: "); - workData.getContributors().forEach(c -> { - logger - .info( - c.getOid() + " - " + c.getCreditName() + " - " + - c.getName() + " - " + c.getSurname() + " - " + - c.getRole() + " - " + c.getSequence()); - }); + AuthorMatcher.match(author, workData.getContributors()); + GsonBuilder builder = new GsonBuilder(); + Gson gson = builder.create(); + logger.info(gson.toJson(workData)); } } - -// -// orcid_RDD = sc.textFile(ORCID_DUMP_PATH) -// no_doi_works_RDD = orcid_RDD.map(orcid_map).filter(lambda x:x is not None).map(lambda x: json.dumps(x)).saveAsTextFile(path=ORCID_OPENAIRE_PATH,compressionCodecClass="org.apache.hadoop.io.compress.GzipCodec") -// \ No newline at end of file From b2213b6435dd3180adff6a7546e9f03337e8056c Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Fri, 26 Jun 2020 17:27:34 +0200 Subject: [PATCH 003/108] merged with dnet version --- .../orcid/ActivitiesDecompressor.java | 2 +- .../doiboost/orcid/SummariesDecompressor.java | 2 +- .../doiboost/orcid/json/JsonHelper.java | 1 + .../orcidnodoi/ActivitiesDumpReader.java | 30 +- .../orcidnodoi/GenOrcidAuthorWork.java | 7 +- .../SparkGenEnrichedOrcidWorks.java | 59 +-- .../orcidnodoi/proto/ProtoWriter.java | 427 ++++++++++++++++++ .../oozie_app/workflow.xml | 2 +- 8 files changed, 483 insertions(+), 47 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/proto/ProtoWriter.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java index 80ccd71a1..02d2b267b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java @@ -19,9 +19,9 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.mortbay.log.Log; -import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; import eu.dnetlib.doiboost.orcid.model.WorkData; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; +import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; public class ActivitiesDecompressor { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java index 603bfedf6..29d72ed0b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java @@ -19,9 +19,9 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.mortbay.log.Log; -import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; import eu.dnetlib.doiboost.orcid.model.AuthorData; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; +import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; public class SummariesDecompressor { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java index 13a3cee8f..bfd6f7447 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java @@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.orcid.json; import com.google.gson.Gson; import com.google.gson.JsonObject; + import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; public class JsonHelper { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java index 7eb6faf54..506641b81 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -1,10 +1,12 @@ package eu.dnetlib.doiboost.orcidnodoi; -import eu.dnetlib.doiboost.orcid.json.JsonHelper; -import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; -import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; -import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URI; + import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.hadoop.conf.Configuration; @@ -17,11 +19,10 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.mortbay.log.Log; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.URI; +import eu.dnetlib.doiboost.orcid.json.JsonHelper; +import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; +import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; public class ActivitiesDumpReader { @@ -82,7 +83,8 @@ public class ActivitiesDumpReader { while ((line = br.readLine()) != null) { buffer.append(line); } - WorkDataNoDoi workDataNoDoi = XMLRecordParserNoDoi.VTDParseWorkData(buffer.toString().getBytes()); + WorkDataNoDoi workDataNoDoi = XMLRecordParserNoDoi + .VTDParseWorkData(buffer.toString().getBytes()); if (workDataNoDoi != null) { if (workDataNoDoi.getErrorCode() != null) { errorFromOrcidFound += 1; @@ -94,9 +96,11 @@ public class ActivitiesDumpReader { + entry.getName()); continue; } - boolean isDoiFound = workDataNoDoi.getExtIds().stream() - .filter(e -> e.getType()!=null) - .anyMatch(e -> e.getType().equals("doi")); + boolean isDoiFound = workDataNoDoi + .getExtIds() + .stream() + .filter(e -> e.getType() != null) + .anyMatch(e -> e.getType().equals("doi")); if (!isDoiFound) { String jsonData = JsonHelper.createOidWork(workDataNoDoi); Log.debug("oid: " + workDataNoDoi.getOid() + " data: " + jsonData); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java index b82f4bc4c..bbaa5acca 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java @@ -1,15 +1,16 @@ package eu.dnetlib.doiboost.orcidnodoi; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcid.OrcidDSManager; +import java.io.IOException; + import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; -import java.io.IOException; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.doiboost.orcid.OrcidDSManager; public class GenOrcidAuthorWork extends OrcidDSManager { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 6bb31bcf6..9d9c5bc4a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -1,13 +1,12 @@ package eu.dnetlib.doiboost.orcidnodoi; -import com.google.gson.Gson; -import com.google.gson.JsonElement; -import com.google.gson.JsonParser; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcid.model.AuthorData; -import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; -import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.util.Objects; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.spark.SparkConf; @@ -19,14 +18,17 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import com.google.gson.JsonParser; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; import scala.Tuple2; -import java.io.IOException; -import java.util.Objects; -import java.util.Optional; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - public class SparkGenEnrichedOrcidWorks { public static void main(String[] args) throws IOException, Exception { @@ -67,27 +69,28 @@ public class SparkGenEnrichedOrcidWorks { Encoders.bean(AuthorData.class)); JavaPairRDD activitiesRDD = sc - .sequenceFile(workingPath + outputWorksPath + "works_X.seq" , Text.class, Text.class); + .sequenceFile(workingPath + outputWorksPath + "works_X.seq", Text.class, Text.class); Dataset activitiesDataset = spark .createDataset( activitiesRDD.map(seq -> loadWorkFromJson(seq._1(), seq._2())).rdd(), Encoders.bean(WorkDataNoDoi.class)); activitiesDataset - .joinWith( - summariesDataset, - activitiesDataset.col("oid").equalTo(summariesDataset.col("oid")), "inner") - .map( - (MapFunction, Tuple2>) value -> { - WorkDataNoDoi w = value._1; - AuthorData a = value._2; - AuthorMatcher.match(a, w.getContributors()); - return new Tuple2<>(a.getOid(), w); - }, - Encoders.tuple(Encoders.STRING(), Encoders.bean(WorkDataNoDoi.class))) - .filter(Objects::nonNull) - .toJavaRDD() - .saveAsTextFile(workingPath + outputEnrichedWorksPath);; + .joinWith( + summariesDataset, + activitiesDataset.col("oid").equalTo(summariesDataset.col("oid")), "inner") + .map( + (MapFunction, Tuple2>) value -> { + WorkDataNoDoi w = value._1; + AuthorData a = value._2; + AuthorMatcher.match(a, w.getContributors()); + return new Tuple2<>(a.getOid(), w); + }, + Encoders.tuple(Encoders.STRING(), Encoders.bean(WorkDataNoDoi.class))) + .filter(Objects::nonNull) + .toJavaRDD() + .saveAsTextFile(workingPath + outputEnrichedWorksPath); + ; }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/proto/ProtoWriter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/proto/ProtoWriter.java new file mode 100644 index 000000000..01b172359 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/proto/ProtoWriter.java @@ -0,0 +1,427 @@ + +package eu.dnetlib.doiboost.orcidnodoi.proto; + +public class ProtoWriter { + +} +// +//import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getArrayValues; +//import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getDefaultResulttype; +//import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getQualifier; +//import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getStringValue; +//import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.isValidDate; +// +//import java.io.IOException; +//import java.io.InputStream; +//import java.util.ArrayList; +//import java.util.HashMap; +//import java.util.List; +//import java.util.Map; +// +//import org.apache.commons.io.IOUtils; +//import org.apache.commons.lang3.StringUtils; +// +//import com.google.gson.Gson; +//import com.google.gson.JsonArray; +//import com.google.gson.JsonElement; +//import com.google.gson.JsonObject; +//import com.googlecode.protobuf.format.JsonFormat; +// +//import eu.dnetlib.actionmanager.actions.ActionFactory; +//import eu.dnetlib.actionmanager.actions.AtomicAction; +//import eu.dnetlib.actionmanager.common.Agent; +//import eu.dnetlib.data.mapreduce.hbase.Reporter; +//import eu.dnetlib.data.mapreduce.util.StreamUtils; +//import eu.dnetlib.data.proto.FieldTypeProtos; +//import eu.dnetlib.data.proto.FieldTypeProtos.Author; +//import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo; +//import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; +//import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; +//import eu.dnetlib.data.proto.FieldTypeProtos.StringField; +//import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; +//import eu.dnetlib.data.proto.KindProtos; +//import eu.dnetlib.data.proto.OafProtos; +//import eu.dnetlib.data.proto.ResultProtos; +//import eu.dnetlib.data.proto.TypeProtos; +//import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions; +//import eu.dnetlib.miscutils.collections.Pair; +//import eu.dnetlib.miscutils.datetime.DateUtils; +//import eu.dnetlib.pace.model.Person; +// +//public class ProtoWriter { +// +// public static final String ORCID = "ORCID"; +// public final static String orcidPREFIX = "orcid_______"; +// public static final String OPENAIRE_PREFIX = "openaire____"; +// public static final String SEPARATOR = "::"; +// +// private static Map> datasources = new HashMap>() { +// +// { +// put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid")); +// +// } +// }; +// +// // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname +// private static Map> externalIds = new HashMap>() { +// +// { +// put("ark".toLowerCase(), new Pair<>("ark", "ark")); +// put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv")); +// put("pmc".toLowerCase(), new Pair<>("pmc", "pmc")); +// put("pmid".toLowerCase(), new Pair<>("pmid", "pmid")); +// put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid")); +// put("urn".toLowerCase(), new Pair<>("urn", "urn")); +// } +// }; +// +// static Map> typologiesMapping; +// +// static { +// try { +// final InputStream is = OrcidToActions.class.getResourceAsStream("/eu/dnetlib/data/mapreduce/hbase/dataimport/mapping_typologies_orcid.json"); +// final String tt = IOUtils.toString(is); +// typologiesMapping = new Gson().fromJson(tt, Map.class); +// } catch (final IOException e) { +// e.printStackTrace(); +// } +// } +// +// public static final String PID_TYPES = "dnet:pid_types"; +// +// public static List generatePublicationActionsFromDump(final JsonObject rootElement, +// final ActionFactory factory, +// final String setName, +// final Agent agent, +// final Reporter context) { +// +// if (!isValid(rootElement, context)) { return null; } +// +// // Create OAF proto +// +// final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder(); +// +// oaf.setDataInfo( +// DataInfo.newBuilder() +// .setDeletedbyinference(false) +// .setInferred(false) +// .setTrust("0.9") +// .setProvenanceaction(getQualifier("sysimport:actionset:orcidworks-no-doi", "dnet:provenanceActions")) +// .build()); +// +// // Adding kind +// oaf.setKind(KindProtos.Kind.entity); +// +// oaf.setLastupdatetimestamp(DateUtils.now()); +// +// // creating result proto +// final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder().setType(TypeProtos.Type.result); +// +// entity.setDateofcollection("2018-10-22"); +// entity.setDateoftransformation(DateUtils.now_ISO8601()); +// +// // Adding external ids +// StreamUtils.toStream(externalIds.keySet().iterator()) +// .forEach(jsonExtId -> { +// final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue(); +// final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey(); +// final String extId = getStringValue(rootElement, jsonExtId); +// if (StringUtils.isNotBlank(extId)) { +// entity.addPid(StructuredProperty.newBuilder() +// .setValue(extId) +// .setQualifier(Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid("dnet:pid_types") +// .setSchemename("dnet:pid_types").build()) +// .build()); +// } +// }); +// +// // Create result field +// final ResultProtos.Result.Builder result = ResultProtos.Result.newBuilder(); +// +// // Create metadata proto +// final ResultProtos.Result.Metadata.Builder metadata = ResultProtos.Result.Metadata.newBuilder(); +// +// // Adding source +// final String source = getStringValue(rootElement, "source"); +// if (StringUtils.isNotBlank(source)) { +// metadata.addSource(StringField.newBuilder().setValue(source).build()); +// } +// +// // Adding title +// final String title = createRepeatedField(rootElement, "titles"); +// if (StringUtils.isBlank(title)) { +// context.incrementCounter("filtered", "title_not_found", 1); +// return null; +// } +// metadata.addTitle(FieldTypeProtos.StructuredProperty.newBuilder() +// .setValue(title) +// .setQualifier(getQualifier("main title", "dnet:dataCite_title")) +// .build()); +// +// // Adding identifier +// final String id = getStringValue(rootElement, "id"); +// String sourceId = null; +// if (id != null) { +// entity.addOriginalId(id); +// sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, AbstractDNetXsltFunctions.md5(id)); +// } else { +// sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, AbstractDNetXsltFunctions.md5(title)); +// } +// entity.setId(sourceId); +// +// // Adding relevant date +// settingRelevantDate(rootElement, metadata, "publication_date", "issued", true); +// +// // Adding collectedfrom +// final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder() +// .setValue(ORCID) +// .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a") +// .build(); +// entity.addCollectedfrom(collectedFrom); +// +// // Adding type +// final String type = getStringValue(rootElement, "type"); +// String cobjValue = ""; +// if (StringUtils.isNotBlank(type)) { +// +// metadata.setResourcetype(FieldTypeProtos.Qualifier.newBuilder() +// .setClassid(type) +// .setClassname(type) +// .setSchemeid("dnet:dataCite_resource") +// .setSchemename("dnet:dataCite_resource") +// .build()); +// +// final String typeValue = typologiesMapping.get(type).get("value"); +// cobjValue = typologiesMapping.get(type).get("cobj"); +// final ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder(); +// +// // Adding hostedby +// instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder() +// .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c") +// .setValue("Unknown Repository") +// .build()); +// +// // Adding url +// final String url = createRepeatedField(rootElement, "urls"); +// if (StringUtils.isNotBlank(url)) { +// instance.addUrl(url); +// } +// +// final String pubDate = getPublicationDate(rootElement, "publication_date"); +// if (StringUtils.isNotBlank(pubDate)) { +// instance.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(pubDate).build()); +// } +// +// instance.setCollectedfrom(collectedFrom); +// +// // Adding accessright +// instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder() +// .setClassid("UNKNOWN") +// .setClassname("UNKNOWN") +// .setSchemeid("dnet:access_modes") +// .setSchemename("dnet:access_modes") +// .build()); +// +// // Adding type +// instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder() +// .setClassid(cobjValue) +// .setClassname(typeValue) +// .setSchemeid("dnet:publication_resource") +// .setSchemename("dnet:publication_resource") +// .build()); +// +// result.addInstance(instance); +// } else { +// context.incrementCounter("filtered", "type_not_found", 1); +// return null; +// } +// +// // Adding authors +// final List authors = createAuthors(rootElement); +// if (authors != null && authors.size() > 0) { +// metadata.addAllAuthor(authors); +// } else { +// context.incrementCounter("filtered", "author_not_found", 1); +// return null; +// } +// +// metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies")); +// result.setMetadata(metadata.build()); +// entity.setResult(result.build()); +// oaf.setEntity(entity.build()); +// +// final List actionList = new ArrayList<>(); +// +// actionList.add(factory.createAtomicAction(setName, agent, oaf.getEntity().getId(), "result", "body", oaf.build().toByteArray())); +// +//// System.out.println(JsonFormat.printToString(oaf.build())); +// return actionList; +// +// } +// +// public static List createAuthors(final JsonObject root) { +// +// final String authorsJSONFieldName = "authors"; +// +// if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) { +// +// final List authors = new ArrayList<>(); +// final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName); +// int firstCounter = 0; +// int defaultCounter = 0; +// int rank = 1; +// int currentRank = 0; +// +// for (final JsonElement item : jsonAuthors) { +// final JsonObject author = item.getAsJsonObject(); +// final Author.Builder result = Author.newBuilder(); +// if (item.isJsonObject()) { +// final String surname = getStringValue(author, "surname"); +// final String name = getStringValue(author, "name"); +// final String oid = getStringValue(author, "oid"); +// final String seq = getStringValue(author, "seq"); +// if (StringUtils.isNotBlank(seq)) { +// if (seq.equals("first")) { +// firstCounter += 1; +// rank = firstCounter; +// +// } else if (seq.equals("additional")) { +// rank = currentRank + 1; +// } else { +// defaultCounter += 1; +// rank = defaultCounter; +// } +// } +// +// if (StringUtils.isNotBlank(oid)) { +// result.addPid(KeyValue.newBuilder() +// .setValue(oid) +// .setKey("ORCID") +// .build()); +// result.setFullname(name + " " + surname); +// if (StringUtils.isNotBlank(name)) { +// result.setName(name); +// } +// if (StringUtils.isNotBlank(surname)) { +// result.setSurname(surname); +// } +// } else { +// String fullname = ""; +// if (StringUtils.isNotBlank(name)) { +// fullname = name; +// } else { +// if (StringUtils.isNotBlank(surname)) { +// fullname = surname; +// } +// } +// Person p = new Person(fullname, false); +// if (p.isAccurate()) { +// result.setName(p.getNormalisedFirstName()); +// result.setSurname(p.getNormalisedSurname()); +// result.setFullname(p.getNormalisedFullname()); +// } +// else { +// result.setFullname(fullname); +// } +// } +// } +// result.setRank(rank); +// authors.add(result.build()); +// currentRank = rank; +// } +// return authors; +// +// } +// return null; +// } +// +// private static String createRepeatedField(final JsonObject rootElement, final String fieldName) { +// String field = ""; +// if (!rootElement.has(fieldName)) { return null; } +// if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) { return null; } +// if (rootElement.get(fieldName).isJsonArray()) { +// if (!isValidJsonArray(rootElement, fieldName)) { return null; } +// final StringBuilder ttl = new StringBuilder(); +// getArrayValues(rootElement, fieldName).forEach(ttl::append); +// field = ttl.toString(); +// } else { +// field = getStringValue(rootElement, fieldName); +// } +// +// if (field != null && !field.isEmpty() && field.charAt(0) == '"' && field.charAt(field.length() - 1) == '"') { +// field = field.substring(1, field.length() - 1); +// } +// return field; +// } +// +// private static void settingRelevantDate(final JsonObject rootElement, +// final ResultProtos.Result.Metadata.Builder metadata, +// final String jsonKey, +// final String dictionaryKey, +// final boolean addToDateOfAcceptance) { +// +// final String pubDate = getPublicationDate(rootElement, "publication_date"); +// if (StringUtils.isNotBlank(pubDate)) { +// if (addToDateOfAcceptance) { +// metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(pubDate).build()); +// } +// metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder() +// .setValue(pubDate) +// .setQualifier(getQualifier(dictionaryKey, "dnet:dataCite_date")) +// .build()); +// } +// } +// +// private static String getPublicationDate(final JsonObject rootElement, +// final String jsonKey) { +// +// final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey); +// if (pubDateJson == null) { return null; } +// final String year = getStringValue(pubDateJson, "year"); +// final String month = getStringValue(pubDateJson, "month"); +// final String day = getStringValue(pubDateJson, "day"); +// +// if (StringUtils.isBlank(year)) { return null; } +// String pubDate = "".concat(year); +// if (StringUtils.isNotBlank(month)) { +// pubDate = pubDate.concat("-" + month); +// if (StringUtils.isNotBlank(day)) { +// pubDate = pubDate.concat("-" + day); +// } else { +// pubDate += "-01"; +// } +// } else { +// pubDate += "-01-01"; +// } +// if (isValidDate(pubDate)) { return pubDate; } +// return null; +// } +// +// protected static boolean isValid(final JsonObject rootElement, final Reporter context) { +// +// final String type = getStringValue(rootElement, "type"); +// if (!typologiesMapping.containsKey(type)) { +// context.incrementCounter("filtered", "unknowntype_" + type, 1); +// return false; +// } +// +// if (!isValidJsonArray(rootElement, "titles")) { +// context.incrementCounter("filtered", "invalid_title", 1); +// return false; +// } +// return true; +// } +// +// private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) { +// if (!rootElement.has(fieldName)) { return false; } +// final JsonElement jsonElement = rootElement.get(fieldName); +// if (jsonElement.isJsonNull()) { return false; } +// if (jsonElement.isJsonArray()) { +// final JsonArray jsonArray = jsonElement.getAsJsonArray(); +// if (jsonArray.isJsonNull()) { return false; } +// if (jsonArray.get(0).isJsonNull()) { return false; } +// } +// return true; +// } +//} diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml index 2486bdb24..33fbdf875 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml @@ -509,7 +509,7 @@ cluster Gen_Enriched_Orcid_Works eu.dnetlib.doiboost.orcidnodoi.SparkGenEnrichedOrcidWorks - dhp-doiboost-1.2.3-SNAPSHOT.jar + dhp-doiboost-1.2.2-SNAPSHOT.jar --num-executors 10 --conf spark.yarn.jars="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2" --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} -w${workingPath}/ From b7b6be12a51c81b2b7469684cf18bc8a3014aec4 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Mon, 29 Jun 2020 18:03:16 +0200 Subject: [PATCH 004/108] fixed enriched works generation --- .../doiboost/orcid/json/JsonHelper.java | 6 +-- .../orcidnodoi/ActivitiesDumpReader.java | 4 +- .../orcidnodoi/GenOrcidAuthorWork.java | 1 + .../SparkGenEnrichedOrcidWorks.java | 29 +++++------ .../orcidnodoi/similarity/AuthorMatcher.java | 48 +++++-------------- .../orcidnodoi/xml/XMLRecordParserNoDoi.java | 4 +- .../oozie_app/config-default.xml | 17 +++++-- .../oozie_app/workflow.xml | 24 +++++++--- 8 files changed, 66 insertions(+), 67 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java index bfd6f7447..94f7d8c91 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java @@ -2,16 +2,12 @@ package eu.dnetlib.doiboost.orcid.json; import com.google.gson.Gson; -import com.google.gson.JsonObject; import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; public class JsonHelper { public static String createOidWork(WorkDataNoDoi workData) { - JsonObject oidWork = new JsonObject(); - oidWork.addProperty("oid", workData.getOid()); - oidWork.addProperty("work", new Gson().toJson(workData)); - return oidWork.toString(); + return new Gson().toJson(workData); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java index 506641b81..bf63568d8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -26,8 +26,8 @@ import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; public class ActivitiesDumpReader { - private static final int MAX_XML_WORKS_PARSED = -1; - private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; + private static final int MAX_XML_WORKS_PARSED = 100; + private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 10; public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) throws Exception { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java index bbaa5acca..8dcee796c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java @@ -45,6 +45,7 @@ public class GenOrcidAuthorWork extends OrcidDSManager { Log.info("HDFS URI: " + hdfsServerUri); workingPath = parser.get("workingPath"); Log.info("Working Path: " + workingPath); + hdfsOrcidDefaultPath = workingPath; activitiesFileNameTarGz = parser.get("activitiesFileNameTarGz"); Log.info("Activities File Name: " + activitiesFileNameTarGz); outputWorksPath = parser.get("outputWorksPath"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 9d9c5bc4a..ae1e4dae6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -24,6 +24,7 @@ import com.google.gson.JsonElement; import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.doiboost.orcid.json.JsonHelper; import eu.dnetlib.doiboost.orcid.model.AuthorData; import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; @@ -31,9 +32,9 @@ import scala.Tuple2; public class SparkGenEnrichedOrcidWorks { + static Logger logger = LoggerFactory.getLogger(SparkGenEnrichedOrcidWorks.class); + public static void main(String[] args) throws IOException, Exception { - Logger logger = LoggerFactory.getLogger(SparkGenEnrichedOrcidWorks.class); - logger.info("[ SparkGenerateDoiAuthorList STARTED]"); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -46,13 +47,9 @@ public class SparkGenEnrichedOrcidWorks { .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); - logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); - logger.info("workingPath: ", workingPath); final String outputEnrichedWorksPath = parser.get("outputEnrichedWorksPath"); - logger.info("outputEnrichedWorksPath: ", outputEnrichedWorksPath); final String outputWorksPath = parser.get("outputWorksPath"); - logger.info("outputWorksPath: ", outputWorksPath); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -67,30 +64,33 @@ public class SparkGenEnrichedOrcidWorks { .createDataset( summariesRDD.map(seq -> loadAuthorFromJson(seq._1(), seq._2())).rdd(), Encoders.bean(AuthorData.class)); + logger.info("Authors data loaded: " + summariesDataset.count()); JavaPairRDD activitiesRDD = sc - .sequenceFile(workingPath + outputWorksPath + "works_X.seq", Text.class, Text.class); + .sequenceFile(workingPath + outputWorksPath + "*.seq", Text.class, Text.class); Dataset activitiesDataset = spark .createDataset( activitiesRDD.map(seq -> loadWorkFromJson(seq._1(), seq._2())).rdd(), Encoders.bean(WorkDataNoDoi.class)); + logger.info("Works data loaded: " + activitiesDataset.count()); - activitiesDataset + JavaRDD> enrichedWorksRDD = activitiesDataset .joinWith( summariesDataset, activitiesDataset.col("oid").equalTo(summariesDataset.col("oid")), "inner") .map( - (MapFunction, Tuple2>) value -> { + (MapFunction, Tuple2>) value -> { WorkDataNoDoi w = value._1; AuthorData a = value._2; AuthorMatcher.match(a, w.getContributors()); - return new Tuple2<>(a.getOid(), w); + return new Tuple2<>(a.getOid(), JsonHelper.createOidWork(w)); }, - Encoders.tuple(Encoders.STRING(), Encoders.bean(WorkDataNoDoi.class))) + Encoders.tuple(Encoders.STRING(), Encoders.STRING())) .filter(Objects::nonNull) - .toJavaRDD() - .saveAsTextFile(workingPath + outputEnrichedWorksPath); - ; + .toJavaRDD(); + logger.info("Works enriched data created: " + enrichedWorksRDD.count()); + enrichedWorksRDD.repartition(10).saveAsTextFile(workingPath + outputEnrichedWorksPath); + logger.info("Works enriched data saved"); }); } @@ -105,6 +105,7 @@ public class SparkGenEnrichedOrcidWorks { } private static WorkDataNoDoi loadWorkFromJson(Text orcidId, Text json) { + WorkDataNoDoi workData = new Gson().fromJson(json.toString(), WorkDataNoDoi.class); return workData; } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java index 09fd8b36b..1e4c38bef 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java @@ -33,15 +33,13 @@ public class AuthorMatcher { List matchCounters = Arrays.asList(matchCounter); Contributor contributor = null; contributors.forEach(c -> { - if (normalize(c.getCreditName()).contains(normalize(author.getName())) || - normalize(c.getCreditName()).contains(normalize(author.getSurname())) || - ((author.getOtherName() != null) - && normalize(c.getCreditName()).contains(normalize(author.getOtherName())))) { + if (simpleMatch(c.getCreditName(), author.getName()) || + simpleMatch(c.getCreditName(), author.getSurname()) || + simpleMatch(c.getCreditName(), author.getOtherName())) { matchCounters.set(0, matchCounters.get(0) + 1); c.setSimpleMatch(true); } }); - logger.info("match counter: " + Integer.toString(matchCounters.get(0))); if (matchCounters.get(0) == 1) { updateAuthorsSimpleMatch(contributors, author); } else if (matchCounters.get(0) > 1) { @@ -50,7 +48,6 @@ public class AuthorMatcher { .filter(c -> c.isSimpleMatch()) .map(c -> { c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); - logger.debug("nella map: " + c.getCreditName() + " score: " + c.getScore()); return c; }) .filter(c -> c.getScore() >= threshold) @@ -59,24 +56,21 @@ public class AuthorMatcher { if (optCon.isPresent()) { bestMatchContributor = optCon.get(); bestMatchContributor.setBestMatch(true); - logger.info("best match: " + bestMatchContributor.getCreditName()); updateAuthorsSimilarityMatch(contributors, author); } } - logger.info("UPDATED contributors: "); - contributors.forEach(c -> { - logger - .info( - c.getOid() + " - " + c.getCreditName() + " - " + - c.getName() + " - " + c.getSurname() + " - " + - c.getRole() + " - " + c.getSequence()); - }); + } + + private static boolean simpleMatch(String name, String searchValue) { + if (searchValue == null) { + return false; + } + return normalize(name).contains(normalize(searchValue)); } private static Double bestMatch(String authorSurname, String authorName, String contributor) { - logger.debug(authorSurname + " " + authorName + " vs " + contributor); String[] contributorSplitted = contributor.split(" "); if (contributorSplitted.length == 0) { return 0.0; @@ -90,10 +84,6 @@ public class AuthorMatcher { } contributorSurname = joiner.toString(); } - logger - .debug( - "contributorName: " + contributorName + - " contributorSurname: " + contributorSurname); String authorNameNrm = normalize(authorName); String authorSurnameNrm = normalize(authorSurname); String contributorNameNrm = normalize(contributorName); @@ -108,8 +98,6 @@ public class AuthorMatcher { private static Double similarity(String nameA, String surnameA, String nameB, String surnameB) { Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB); - logger - .debug(nameA + ", " + surnameA + " <> " + nameB + ", " + surnameB + " score: " + Double.toString(score)); return score; } @@ -118,6 +106,9 @@ public class AuthorMatcher { } private static String normalize(final String s) { + if (s == null) { + return new String(""); + } return nfd(s) .toLowerCase() // do not compact the regexes in a single expression, would cause StackOverflowError @@ -142,7 +133,6 @@ public class AuthorMatcher { private static void updateAuthorsSimpleMatch(List contributors, AuthorData author) { contributors.forEach(c -> { if (c.isSimpleMatch()) { - logger.info("simple match on : " + c.getCreditName()); c.setName(author.getName()); c.setSurname(author.getSurname()); c.setOid(author.getOid()); @@ -152,21 +142,10 @@ public class AuthorMatcher { } private static void updateAuthorsSimilarityMatch(List contributors, AuthorData author) { - logger.info("inside updateAuthorsSimilarityMatch ..."); - contributors.forEach(c -> { - logger - .info( - c.getOid() + " - " + c.getCreditName() + " - " + - c.getName() + " - " + c.getSurname() + " - " + - c.getRole() + " - " + c.getSequence() + " - best: " + c.isBestMatch() + " - simpe: " - + c.isSimpleMatch()); - }); - contributors .stream() .filter(c -> c.isBestMatch()) .forEach(c -> { - logger.info("similarity match on : " + c.getCreditName()); c.setName(author.getName()); c.setSurname(author.getSurname()); c.setOid(author.getOid()); @@ -184,7 +163,6 @@ public class AuthorMatcher { c.getSequence().equals("additional"))) .count() > 0) { seqFound = true; - logger.info("sequence data found"); } if (!seqFound) { List seqIds = Arrays.asList(0); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java index 6e5771547..ae96a322f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java @@ -41,7 +41,6 @@ public class XMLRecordParserNoDoi { public static WorkDataNoDoi VTDParseWorkData(byte[] bytes) throws VtdException, EncodingException, EOFException, EntityException, ParseException, XPathParseException, NavException, XPathEvalException { - logger.info("parsing xml ..."); final VTDGen vg = new VTDGen(); vg.setDoc(bytes); vg.parse(true); @@ -191,6 +190,9 @@ public class XMLRecordParserNoDoi { nameIndex++; } } + if (contributors.size() == 0) { + return contributors; + } int sequenceIndex = 0; ap.selectXPath("//work:contributor/work:contributor-attributes/work:contributor-sequence"); diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/config-default.xml index f2d51e260..3068562d0 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/config-default.xml @@ -8,15 +8,24 @@ true - oozie.launcher.mapreduce.map.java.opts - -Xmx4g + oozie.launcher.mapreduce.map.java.opts + -Xmx4g jobTracker - hadoop-rm3.garr-pa1.d4science.org:8032 + yarnRM nameNode - hdfs://hadoop-rm1.garr-pa1.d4science.org:8020 + hdfs://nameservice1 + + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml index 33fbdf875..df5e0e76f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml @@ -71,10 +71,9 @@ the shell command that downloads and puts to hdfs orcid activity file X - + - - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -133,6 +132,7 @@ -n${nameNode} -fORCID_2019_activites_0.tar.gz -owno_doi_works/works_0.seq + -oewno_doi_enriched_works/ @@ -169,6 +169,7 @@ -n${nameNode} -fORCID_2019_activites_1.tar.gz -owno_doi_works/works_1.seq + -oewno_doi_enriched_works/ @@ -205,6 +206,7 @@ -n${nameNode} -fORCID_2019_activites_2.tar.gz -owno_doi_works/works_2.seq + -oewno_doi_enriched_works/ @@ -241,6 +243,7 @@ -n${nameNode} -fORCID_2019_activites_3.tar.gz -owno_doi_works/works_3.seq + -oewno_doi_enriched_works/ @@ -277,6 +280,7 @@ -n${nameNode} -fORCID_2019_activites_4.tar.gz -owno_doi_works/works_4.seq + -oewno_doi_enriched_works/ @@ -313,6 +317,7 @@ -n${nameNode} -fORCID_2019_activites_5.tar.gz -owno_doi_works/works_5.seq + -oewno_doi_enriched_works/ @@ -349,6 +354,7 @@ -n${nameNode} -fORCID_2019_activites_6.tar.gz -owno_doi_works/works_6.seq + -oewno_doi_enriched_works/ @@ -386,6 +392,7 @@ -n${nameNode} -fORCID_2019_activites_7.tar.gz -owno_doi_works/works_7.seq + -oewno_doi_enriched_works/ @@ -422,6 +429,7 @@ -n${nameNode} -fORCID_2019_activites_8.tar.gz -owno_doi_works/works_8.seq + -oewno_doi_enriched_works/ @@ -458,6 +466,7 @@ -n${nameNode} -fORCID_2019_activites_9.tar.gz -owno_doi_works/works_9.seq + -oewno_doi_enriched_works/ @@ -494,11 +503,12 @@ -n${nameNode} -fORCID_2019_activites_X.tar.gz -owno_doi_works/works_X.seq + -oewno_doi_enriched_works/ - + @@ -509,12 +519,14 @@ cluster Gen_Enriched_Orcid_Works eu.dnetlib.doiboost.orcidnodoi.SparkGenEnrichedOrcidWorks - dhp-doiboost-1.2.2-SNAPSHOT.jar + dhp-doiboost-1.2.4-SNAPSHOT.jar --num-executors 10 --conf spark.yarn.jars="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2" --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} -w${workingPath}/ + -n${nameNode} + -f- -owno_doi_works/ - -oewno_doi_enriched_works/ + -oewno_doi_enriched_works/output From 5525f57ec8f9ef07d74ab30c54ab8d39e924d413 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Wed, 1 Jul 2020 18:36:14 +0200 Subject: [PATCH 005/108] converter from orcid work json to oaf --- .../orcidnodoi/oaf/OrcidWorkToOAF.java | 420 +++++++++++++++++ .../orcidnodoi/proto/ProtoWriter.java | 427 ------------------ .../orcidnodoi/util/DumpToActionsUtility.java | 107 +++++ .../doiboost/orcidnodoi/util/Pair.java | 30 ++ .../orcidnodoi/mappings/typologies.json | 41 ++ 5 files changed, 598 insertions(+), 427 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/OrcidWorkToOAF.java delete mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/proto/ProtoWriter.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/OrcidWorkToOAF.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/OrcidWorkToOAF.java new file mode 100644 index 000000000..673abb407 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/OrcidWorkToOAF.java @@ -0,0 +1,420 @@ + +package eu.dnetlib.doiboost.orcidnodoi.oaf; + +import com.google.gson.Gson; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import eu.dnetlib.dhp.common.PacePerson; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.utils.DHPUtils; +import eu.dnetlib.doiboost.orcidnodoi.SparkGenEnrichedOrcidWorks; +import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; +import eu.dnetlib.doiboost.orcidnodoi.util.Pair; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.stream.Collectors; + +import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*; + +public class OrcidWorkToOAF { + + static Logger logger = LoggerFactory.getLogger(OrcidWorkToOAF.class); + + public static final String ORCID = "ORCID"; + public final static String orcidPREFIX = "orcid_______"; + public static final String OPENAIRE_PREFIX = "openaire____"; + public static final String SEPARATOR = "::"; + + private static Map> datasources = new HashMap>() { + + { + put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid")); + + } + }; + + // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname + private static Map> externalIds = new HashMap>() { + + { + put("ark".toLowerCase(), new Pair<>("ark", "ark")); + put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv")); + put("pmc".toLowerCase(), new Pair<>("pmc", "pmc")); + put("pmid".toLowerCase(), new Pair<>("pmid", "pmid")); + put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid")); + put("urn".toLowerCase(), new Pair<>("urn", "urn")); + } + }; + + static Map> typologiesMapping; + + static { + try { + final String tt = IOUtils.toString(OrcidWorkToOAF.class.getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json")); + typologiesMapping = new Gson().fromJson(tt, Map.class); + } catch (final Exception e) { + logger.error("loading typologies", e); + } + } + + public static final String PID_TYPES = "dnet:pid_types"; + + public static Oaf generatePublicationActionsFromDump(final JsonObject rootElement, final String setName) { + + if (!isValid(rootElement/*, context*/)) { return null; } + + Publication publication = new Publication(); + + final DataInfo dataInfo = new DataInfo(); + dataInfo.setDeletedbyinference(false); + dataInfo.setInferred(false); + dataInfo.setTrust("0.9"); + dataInfo.setProvenanceaction( + mapQualifier( + "sysimport:actionset:orcidworks-no-doi", + "sysimport:actionset:orcidworks-no-doi", + "dnet:provenanceActions", + "dnet:provenanceActions")); + publication.setDataInfo(dataInfo); + + publication.setLastupdatetimestamp(new Date().getTime()); + + publication.setDateofcollection("2019-10-22"); + publication.setDateoftransformation(DumpToActionsUtility.now_ISO8601()); + + // Adding external ids + externalIds.keySet().stream() + .forEach(jsonExtId -> { + final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue(); + final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey(); + final String extId = getStringValue(rootElement, jsonExtId); + if (StringUtils.isNotBlank(extId)) { + publication.getExternalReference().add( + convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types")); + } + }); + + // Adding source +// final String source = getStringValue(rootElement, "source"); +// if (StringUtils.isNotBlank(source)) { +// metadata.addSource(StringField.newBuilder().setValue(source).build()); +// } + + // Adding titles + final List titles = createRepeatedField(rootElement, "titles"); + if (titles==null || titles.isEmpty()) { +// context.incrementCounter("filtered", "title_not_found", 1); + return null; + } + Qualifier q = mapQualifier("main title","main title","dnet:dataCite_title","dnet:dataCite_title"); + publication.setTitle( + titles + .stream() + .map(t -> { + return mapStructuredProperty(t, q, null); + }) + .collect(Collectors.toList())); + // Adding identifier + final String id = getStringValue(rootElement, "id"); + String sourceId = null; + if (id != null) { + publication.setOriginalId(Arrays.asList(id)); + sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(id.toLowerCase())); + } else { + String mergedTitle = titles.stream().map(Object::toString).collect(Collectors.joining(",")); + sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(mergedTitle.toLowerCase())); + } + publication.setId(sourceId); + + // Adding relevant date + settingRelevantDate(rootElement, publication, "publication_date", "issued", true); + + // Adding collectedfrom + publication.setCollectedfrom(Arrays.asList(createCollectedFrom())); + + // Adding type + final String type = getStringValue(rootElement, "type"); + String cobjValue = ""; + if (StringUtils.isNotBlank(type)) { + publication.setResourcetype(mapQualifier(type, type, "dnet:dataCite_resource", "dnet:dataCite_resource")); + + final String typeValue = typologiesMapping.get(type).get("value"); + cobjValue = typologiesMapping.get(type).get("cobj"); + final Instance instance = new Instance(); + + // Adding hostedby + instance.setHostedby(createHostedBy()); + + // Adding url + final List urls = createRepeatedField(rootElement, "urls"); + if (urls!=null && !urls.isEmpty()) { + instance.setUrl(urls); + } + + final String pubDate = getPublicationDate(rootElement, "publication_date"); + if (StringUtils.isNotBlank(pubDate)) { + instance.setDateofacceptance(mapStringField(pubDate, null)); + } + + instance.setCollectedfrom(createCollectedFrom()); + + // Adding accessright + instance.setAccessright(mapQualifier("UNKNOWN", "UNKNOWN", "dnet:access_modes", "dnet:access_modes")); + + // Adding type + instance.setInstancetype(mapQualifier(cobjValue, typeValue, "dnet:publication_resource", "dnet:publication_resource")); + + publication.setInstance(Arrays.asList(instance)); + } else { +// context.incrementCounter("filtered", "type_not_found", 1); + return null; + } + + // Adding authors + final List authors = createAuthors(rootElement); + if (authors != null && authors.size() > 0) { + publication.setAuthor(authors); + } else { +// context.incrementCounter("filtered", "author_not_found", 1); + return null; + } + String classValue = getDefaultResulttype(cobjValue); + publication.setResulttype(mapQualifier(classValue, classValue,"dnet:result_typologies", "dnet:result_typologies")); + return publication; + } + + public static List createAuthors(final JsonObject root) { + + final String authorsJSONFieldName = "authors"; + + if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) { + + final List authors = new ArrayList<>(); + final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName); + int firstCounter = 0; + int defaultCounter = 0; + int rank = 1; + int currentRank = 0; + + for (final JsonElement item : jsonAuthors) { + final JsonObject jsonAuthor = item.getAsJsonObject(); + final Author author = new Author(); + if (item.isJsonObject()) { + final String surname = getStringValue(jsonAuthor, "surname"); + final String name = getStringValue(jsonAuthor, "name"); + final String oid = getStringValue(jsonAuthor, "oid"); + final String seq = getStringValue(jsonAuthor, "seq"); + if (StringUtils.isNotBlank(seq)) { + if (seq.equals("first")) { + firstCounter += 1; + rank = firstCounter; + + } else if (seq.equals("additional")) { + rank = currentRank + 1; + } else { + defaultCounter += 1; + rank = defaultCounter; + } + } + + if (StringUtils.isNotBlank(oid)) { + author.setPid(Arrays.asList(mapAuthorId(oid))); + author.setFullname(name + " " + surname); + if (StringUtils.isNotBlank(name)) { + author.setName(name); + } + if (StringUtils.isNotBlank(surname)) { + author.setSurname(surname); + } + } else { + String fullname = ""; + if (StringUtils.isNotBlank(name)) { + fullname = name; + } else { + if (StringUtils.isNotBlank(surname)) { + fullname = surname; + } + } + PacePerson p = new PacePerson(fullname, false); + if (p.isAccurate()) { + author.setName(p.getNormalisedFirstName()); + author.setSurname(p.getNormalisedSurname()); + author.setFullname(p.getNormalisedFullname()); + } + else { + author.setFullname(fullname); + } + } + } + author.setRank(rank); + authors.add(author); + currentRank = rank; + } + return authors; + + } + return null; + } + + private static List createRepeatedField(final JsonObject rootElement, final String fieldName) { + if (!rootElement.has(fieldName)) { return null; } + if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) { return null; } + if (rootElement.get(fieldName).isJsonArray()) { + if (!isValidJsonArray(rootElement, fieldName)) { return null; } + return getArrayValues(rootElement, fieldName); + } else { + String field = getStringValue(rootElement, fieldName); + return Arrays.asList(cleanField(field)); + } + } + + private static String cleanField(String value) { + if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') { + value = value.substring(1, value.length() - 1); + } + return value; + } + + private static void settingRelevantDate(final JsonObject rootElement, + final Publication publication, + final String jsonKey, + final String dictionaryKey, + final boolean addToDateOfAcceptance) { + + final String pubDate = getPublicationDate(rootElement, "publication_date"); + if (StringUtils.isNotBlank(pubDate)) { + if (addToDateOfAcceptance) { + publication.setDateofacceptance(mapStringField(pubDate, null)); + } + Qualifier q = mapQualifier(dictionaryKey,dictionaryKey,"dnet:dataCite_date","dnet:dataCite_date"); + publication.setRelevantdate( + Arrays.asList(pubDate) + .stream() + .map(r -> { + return mapStructuredProperty(r, q, null); + }) + .collect(Collectors.toList())); + } + } + + private static String getPublicationDate(final JsonObject rootElement, + final String jsonKey) { + + final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey); + if (pubDateJson == null) { return null; } + final String year = getStringValue(pubDateJson, "year"); + final String month = getStringValue(pubDateJson, "month"); + final String day = getStringValue(pubDateJson, "day"); + + if (StringUtils.isBlank(year)) { return null; } + String pubDate = "".concat(year); + if (StringUtils.isNotBlank(month)) { + pubDate = pubDate.concat("-" + month); + if (StringUtils.isNotBlank(day)) { + pubDate = pubDate.concat("-" + day); + } else { + pubDate += "-01"; + } + } else { + pubDate += "-01-01"; + } + if (isValidDate(pubDate)) { return pubDate; } + return null; + } + + protected static boolean isValid(final JsonObject rootElement/*, final Reporter context*/) { + + final String type = getStringValue(rootElement, "type"); + if (!typologiesMapping.containsKey(type)) { +// context.incrementCounter("filtered", "unknowntype_" + type, 1); + return false; + } + + if (!isValidJsonArray(rootElement, "titles")) { +// context.incrementCounter("filtered", "invalid_title", 1); + return false; + } + return true; + } + + private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) { + if (!rootElement.has(fieldName)) { return false; } + final JsonElement jsonElement = rootElement.get(fieldName); + if (jsonElement.isJsonNull()) { return false; } + if (jsonElement.isJsonArray()) { + final JsonArray jsonArray = jsonElement.getAsJsonArray(); + if (jsonArray.isJsonNull()) { return false; } + if (jsonArray.get(0).isJsonNull()) { return false; } + } + return true; + } + + private static Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) { + final Qualifier qualifier = new Qualifier(); + qualifier.setClassid(classId); + qualifier.setClassname(className); + qualifier.setSchemeid(schemeId); + qualifier.setSchemename(schemeName); + return qualifier; + } + + private static ExternalReference convertExtRef(String extId, String classId, String className, String schemeId, String schemeName) { + ExternalReference ex = new ExternalReference(); + ex.setRefidentifier(extId); + ex.setQualifier(mapQualifier(classId, className, schemeId, schemeName )); + return ex; + } + + private static StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) { + if (value == null | StringUtils.isBlank(value)) { + return null; + } + + final StructuredProperty structuredProperty = new StructuredProperty(); + structuredProperty.setValue(value); + structuredProperty.setQualifier(qualifier); + structuredProperty.setDataInfo(dataInfo); + return structuredProperty; + } + + private static Field mapStringField(String value, DataInfo dataInfo) { + if (value == null || StringUtils.isBlank(value)) { + return null; + } + + final Field stringField = new Field<>(); + stringField.setValue(value); + stringField.setDataInfo(dataInfo); + return stringField; + } + + private static KeyValue createCollectedFrom() { + KeyValue cf = new KeyValue(); + cf.setValue(ORCID); + cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a"); + return cf; + } + + private static KeyValue createHostedBy() { + KeyValue hb = new KeyValue(); + hb.setValue("Unknown Repository"); + hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c"); + return hb; + } + + private static StructuredProperty mapAuthorId(String orcidId) { + final StructuredProperty sp = new StructuredProperty(); + sp.setValue(orcidId); + final Qualifier q = new Qualifier(); + q.setClassid("ORCID"); + q.setClassname("ORCID"); + sp.setQualifier(q); + return sp; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/proto/ProtoWriter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/proto/ProtoWriter.java deleted file mode 100644 index 01b172359..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/proto/ProtoWriter.java +++ /dev/null @@ -1,427 +0,0 @@ - -package eu.dnetlib.doiboost.orcidnodoi.proto; - -public class ProtoWriter { - -} -// -//import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getArrayValues; -//import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getDefaultResulttype; -//import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getQualifier; -//import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getStringValue; -//import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.isValidDate; -// -//import java.io.IOException; -//import java.io.InputStream; -//import java.util.ArrayList; -//import java.util.HashMap; -//import java.util.List; -//import java.util.Map; -// -//import org.apache.commons.io.IOUtils; -//import org.apache.commons.lang3.StringUtils; -// -//import com.google.gson.Gson; -//import com.google.gson.JsonArray; -//import com.google.gson.JsonElement; -//import com.google.gson.JsonObject; -//import com.googlecode.protobuf.format.JsonFormat; -// -//import eu.dnetlib.actionmanager.actions.ActionFactory; -//import eu.dnetlib.actionmanager.actions.AtomicAction; -//import eu.dnetlib.actionmanager.common.Agent; -//import eu.dnetlib.data.mapreduce.hbase.Reporter; -//import eu.dnetlib.data.mapreduce.util.StreamUtils; -//import eu.dnetlib.data.proto.FieldTypeProtos; -//import eu.dnetlib.data.proto.FieldTypeProtos.Author; -//import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo; -//import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; -//import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; -//import eu.dnetlib.data.proto.FieldTypeProtos.StringField; -//import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; -//import eu.dnetlib.data.proto.KindProtos; -//import eu.dnetlib.data.proto.OafProtos; -//import eu.dnetlib.data.proto.ResultProtos; -//import eu.dnetlib.data.proto.TypeProtos; -//import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions; -//import eu.dnetlib.miscutils.collections.Pair; -//import eu.dnetlib.miscutils.datetime.DateUtils; -//import eu.dnetlib.pace.model.Person; -// -//public class ProtoWriter { -// -// public static final String ORCID = "ORCID"; -// public final static String orcidPREFIX = "orcid_______"; -// public static final String OPENAIRE_PREFIX = "openaire____"; -// public static final String SEPARATOR = "::"; -// -// private static Map> datasources = new HashMap>() { -// -// { -// put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid")); -// -// } -// }; -// -// // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname -// private static Map> externalIds = new HashMap>() { -// -// { -// put("ark".toLowerCase(), new Pair<>("ark", "ark")); -// put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv")); -// put("pmc".toLowerCase(), new Pair<>("pmc", "pmc")); -// put("pmid".toLowerCase(), new Pair<>("pmid", "pmid")); -// put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid")); -// put("urn".toLowerCase(), new Pair<>("urn", "urn")); -// } -// }; -// -// static Map> typologiesMapping; -// -// static { -// try { -// final InputStream is = OrcidToActions.class.getResourceAsStream("/eu/dnetlib/data/mapreduce/hbase/dataimport/mapping_typologies_orcid.json"); -// final String tt = IOUtils.toString(is); -// typologiesMapping = new Gson().fromJson(tt, Map.class); -// } catch (final IOException e) { -// e.printStackTrace(); -// } -// } -// -// public static final String PID_TYPES = "dnet:pid_types"; -// -// public static List generatePublicationActionsFromDump(final JsonObject rootElement, -// final ActionFactory factory, -// final String setName, -// final Agent agent, -// final Reporter context) { -// -// if (!isValid(rootElement, context)) { return null; } -// -// // Create OAF proto -// -// final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder(); -// -// oaf.setDataInfo( -// DataInfo.newBuilder() -// .setDeletedbyinference(false) -// .setInferred(false) -// .setTrust("0.9") -// .setProvenanceaction(getQualifier("sysimport:actionset:orcidworks-no-doi", "dnet:provenanceActions")) -// .build()); -// -// // Adding kind -// oaf.setKind(KindProtos.Kind.entity); -// -// oaf.setLastupdatetimestamp(DateUtils.now()); -// -// // creating result proto -// final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder().setType(TypeProtos.Type.result); -// -// entity.setDateofcollection("2018-10-22"); -// entity.setDateoftransformation(DateUtils.now_ISO8601()); -// -// // Adding external ids -// StreamUtils.toStream(externalIds.keySet().iterator()) -// .forEach(jsonExtId -> { -// final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue(); -// final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey(); -// final String extId = getStringValue(rootElement, jsonExtId); -// if (StringUtils.isNotBlank(extId)) { -// entity.addPid(StructuredProperty.newBuilder() -// .setValue(extId) -// .setQualifier(Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid("dnet:pid_types") -// .setSchemename("dnet:pid_types").build()) -// .build()); -// } -// }); -// -// // Create result field -// final ResultProtos.Result.Builder result = ResultProtos.Result.newBuilder(); -// -// // Create metadata proto -// final ResultProtos.Result.Metadata.Builder metadata = ResultProtos.Result.Metadata.newBuilder(); -// -// // Adding source -// final String source = getStringValue(rootElement, "source"); -// if (StringUtils.isNotBlank(source)) { -// metadata.addSource(StringField.newBuilder().setValue(source).build()); -// } -// -// // Adding title -// final String title = createRepeatedField(rootElement, "titles"); -// if (StringUtils.isBlank(title)) { -// context.incrementCounter("filtered", "title_not_found", 1); -// return null; -// } -// metadata.addTitle(FieldTypeProtos.StructuredProperty.newBuilder() -// .setValue(title) -// .setQualifier(getQualifier("main title", "dnet:dataCite_title")) -// .build()); -// -// // Adding identifier -// final String id = getStringValue(rootElement, "id"); -// String sourceId = null; -// if (id != null) { -// entity.addOriginalId(id); -// sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, AbstractDNetXsltFunctions.md5(id)); -// } else { -// sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, AbstractDNetXsltFunctions.md5(title)); -// } -// entity.setId(sourceId); -// -// // Adding relevant date -// settingRelevantDate(rootElement, metadata, "publication_date", "issued", true); -// -// // Adding collectedfrom -// final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder() -// .setValue(ORCID) -// .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a") -// .build(); -// entity.addCollectedfrom(collectedFrom); -// -// // Adding type -// final String type = getStringValue(rootElement, "type"); -// String cobjValue = ""; -// if (StringUtils.isNotBlank(type)) { -// -// metadata.setResourcetype(FieldTypeProtos.Qualifier.newBuilder() -// .setClassid(type) -// .setClassname(type) -// .setSchemeid("dnet:dataCite_resource") -// .setSchemename("dnet:dataCite_resource") -// .build()); -// -// final String typeValue = typologiesMapping.get(type).get("value"); -// cobjValue = typologiesMapping.get(type).get("cobj"); -// final ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder(); -// -// // Adding hostedby -// instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder() -// .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c") -// .setValue("Unknown Repository") -// .build()); -// -// // Adding url -// final String url = createRepeatedField(rootElement, "urls"); -// if (StringUtils.isNotBlank(url)) { -// instance.addUrl(url); -// } -// -// final String pubDate = getPublicationDate(rootElement, "publication_date"); -// if (StringUtils.isNotBlank(pubDate)) { -// instance.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(pubDate).build()); -// } -// -// instance.setCollectedfrom(collectedFrom); -// -// // Adding accessright -// instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder() -// .setClassid("UNKNOWN") -// .setClassname("UNKNOWN") -// .setSchemeid("dnet:access_modes") -// .setSchemename("dnet:access_modes") -// .build()); -// -// // Adding type -// instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder() -// .setClassid(cobjValue) -// .setClassname(typeValue) -// .setSchemeid("dnet:publication_resource") -// .setSchemename("dnet:publication_resource") -// .build()); -// -// result.addInstance(instance); -// } else { -// context.incrementCounter("filtered", "type_not_found", 1); -// return null; -// } -// -// // Adding authors -// final List authors = createAuthors(rootElement); -// if (authors != null && authors.size() > 0) { -// metadata.addAllAuthor(authors); -// } else { -// context.incrementCounter("filtered", "author_not_found", 1); -// return null; -// } -// -// metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies")); -// result.setMetadata(metadata.build()); -// entity.setResult(result.build()); -// oaf.setEntity(entity.build()); -// -// final List actionList = new ArrayList<>(); -// -// actionList.add(factory.createAtomicAction(setName, agent, oaf.getEntity().getId(), "result", "body", oaf.build().toByteArray())); -// -//// System.out.println(JsonFormat.printToString(oaf.build())); -// return actionList; -// -// } -// -// public static List createAuthors(final JsonObject root) { -// -// final String authorsJSONFieldName = "authors"; -// -// if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) { -// -// final List authors = new ArrayList<>(); -// final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName); -// int firstCounter = 0; -// int defaultCounter = 0; -// int rank = 1; -// int currentRank = 0; -// -// for (final JsonElement item : jsonAuthors) { -// final JsonObject author = item.getAsJsonObject(); -// final Author.Builder result = Author.newBuilder(); -// if (item.isJsonObject()) { -// final String surname = getStringValue(author, "surname"); -// final String name = getStringValue(author, "name"); -// final String oid = getStringValue(author, "oid"); -// final String seq = getStringValue(author, "seq"); -// if (StringUtils.isNotBlank(seq)) { -// if (seq.equals("first")) { -// firstCounter += 1; -// rank = firstCounter; -// -// } else if (seq.equals("additional")) { -// rank = currentRank + 1; -// } else { -// defaultCounter += 1; -// rank = defaultCounter; -// } -// } -// -// if (StringUtils.isNotBlank(oid)) { -// result.addPid(KeyValue.newBuilder() -// .setValue(oid) -// .setKey("ORCID") -// .build()); -// result.setFullname(name + " " + surname); -// if (StringUtils.isNotBlank(name)) { -// result.setName(name); -// } -// if (StringUtils.isNotBlank(surname)) { -// result.setSurname(surname); -// } -// } else { -// String fullname = ""; -// if (StringUtils.isNotBlank(name)) { -// fullname = name; -// } else { -// if (StringUtils.isNotBlank(surname)) { -// fullname = surname; -// } -// } -// Person p = new Person(fullname, false); -// if (p.isAccurate()) { -// result.setName(p.getNormalisedFirstName()); -// result.setSurname(p.getNormalisedSurname()); -// result.setFullname(p.getNormalisedFullname()); -// } -// else { -// result.setFullname(fullname); -// } -// } -// } -// result.setRank(rank); -// authors.add(result.build()); -// currentRank = rank; -// } -// return authors; -// -// } -// return null; -// } -// -// private static String createRepeatedField(final JsonObject rootElement, final String fieldName) { -// String field = ""; -// if (!rootElement.has(fieldName)) { return null; } -// if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) { return null; } -// if (rootElement.get(fieldName).isJsonArray()) { -// if (!isValidJsonArray(rootElement, fieldName)) { return null; } -// final StringBuilder ttl = new StringBuilder(); -// getArrayValues(rootElement, fieldName).forEach(ttl::append); -// field = ttl.toString(); -// } else { -// field = getStringValue(rootElement, fieldName); -// } -// -// if (field != null && !field.isEmpty() && field.charAt(0) == '"' && field.charAt(field.length() - 1) == '"') { -// field = field.substring(1, field.length() - 1); -// } -// return field; -// } -// -// private static void settingRelevantDate(final JsonObject rootElement, -// final ResultProtos.Result.Metadata.Builder metadata, -// final String jsonKey, -// final String dictionaryKey, -// final boolean addToDateOfAcceptance) { -// -// final String pubDate = getPublicationDate(rootElement, "publication_date"); -// if (StringUtils.isNotBlank(pubDate)) { -// if (addToDateOfAcceptance) { -// metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(pubDate).build()); -// } -// metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder() -// .setValue(pubDate) -// .setQualifier(getQualifier(dictionaryKey, "dnet:dataCite_date")) -// .build()); -// } -// } -// -// private static String getPublicationDate(final JsonObject rootElement, -// final String jsonKey) { -// -// final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey); -// if (pubDateJson == null) { return null; } -// final String year = getStringValue(pubDateJson, "year"); -// final String month = getStringValue(pubDateJson, "month"); -// final String day = getStringValue(pubDateJson, "day"); -// -// if (StringUtils.isBlank(year)) { return null; } -// String pubDate = "".concat(year); -// if (StringUtils.isNotBlank(month)) { -// pubDate = pubDate.concat("-" + month); -// if (StringUtils.isNotBlank(day)) { -// pubDate = pubDate.concat("-" + day); -// } else { -// pubDate += "-01"; -// } -// } else { -// pubDate += "-01-01"; -// } -// if (isValidDate(pubDate)) { return pubDate; } -// return null; -// } -// -// protected static boolean isValid(final JsonObject rootElement, final Reporter context) { -// -// final String type = getStringValue(rootElement, "type"); -// if (!typologiesMapping.containsKey(type)) { -// context.incrementCounter("filtered", "unknowntype_" + type, 1); -// return false; -// } -// -// if (!isValidJsonArray(rootElement, "titles")) { -// context.incrementCounter("filtered", "invalid_title", 1); -// return false; -// } -// return true; -// } -// -// private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) { -// if (!rootElement.has(fieldName)) { return false; } -// final JsonElement jsonElement = rootElement.get(fieldName); -// if (jsonElement.isJsonNull()) { return false; } -// if (jsonElement.isJsonArray()) { -// final JsonArray jsonArray = jsonElement.getAsJsonArray(); -// if (jsonArray.isJsonNull()) { return false; } -// if (jsonArray.get(0).isJsonNull()) { return false; } -// } -// return true; -// } -//} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java new file mode 100644 index 000000000..c460f6299 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java @@ -0,0 +1,107 @@ +package eu.dnetlib.doiboost.orcidnodoi.util; + +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; +import org.apache.commons.lang3.StringUtils; + +import java.text.SimpleDateFormat; +import java.util.*; + +public class DumpToActionsUtility { + + private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US); + + public static String getStringValue(final JsonObject root, final String key) { + if (root.has(key) && !root.get(key).isJsonNull()) + return root.get(key).getAsString(); + return null; + } + + public static List getArrayValues(final JsonObject root, final String key) { + if (root.has(key) && root.get(key).isJsonArray()) { + final JsonArray asJsonArray = root.get(key).getAsJsonArray(); + final List result = new ArrayList<>(); + + + asJsonArray.forEach(it -> { + if (StringUtils.isNotBlank(it.getAsString())) { + result.add(it.getAsString()); + } + }); + return result; + } + return new ArrayList<>(); + } + public static List getArrayObjects(final JsonObject root, final String key) { + if (root.has(key) && root.get(key).isJsonArray()) { + final JsonArray asJsonArray = root.get(key).getAsJsonArray(); + final List result = new ArrayList<>(); + asJsonArray.forEach(it -> { + if (it.getAsJsonObject() != null) { + result.add(it.getAsJsonObject()); + } + }); + return result; + } + return new ArrayList<>(); + } + + public static boolean isValidDate(final String date) { + return date.matches("\\d{4}-\\d{2}-\\d{2}"); + } + + public static String now_ISO8601() { // NOPMD + String result; + synchronized (ISO8601FORMAT) { + result = ISO8601FORMAT.format(new Date()); + } + //convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00 + //- note the added colon for the Timezone + return result.substring(0, result.length() - 2) + ":" + result.substring(result.length() - 2); + } + + public static String getDefaultResulttype(final String cobjcategory) { + switch (cobjcategory) { + case "0029": + return "software"; + case "0021": + case "0024": + case "0025": + case "0030": + return "dataset"; + case "0000": + case "0010": + case "0018": + case "0020": + case "0022": + case "0023": + case "0026": + case "0027": + case "0028": + case "0037": + return "other"; + case "0001": + case "0002": + case "0004": + case "0005": + case "0006": + case "0007": + case "0008": + case "0009": + case "0011": + case "0012": + case "0013": + case "0014": + case "0015": + case "0016": + case "0017": + case "0019": + case "0031": + case "0032": + return "publication"; + default: + return "publication"; + } + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java new file mode 100644 index 000000000..58c09af60 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java @@ -0,0 +1,30 @@ +package eu.dnetlib.doiboost.orcidnodoi.util; + +public class Pair { + + private K k; + + private V v; + + public Pair(K k, V v) { + this.k = k; + this.v = v; + } + + public K getKey() { + return k; + } + + public V getValue() { + return v; + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof Pair) { + Pair tmp = (Pair) obj; + return k.equals(tmp.getKey()) && v.equals(tmp.getValue()); + } else return false; + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json new file mode 100644 index 000000000..cb696f279 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json @@ -0,0 +1,41 @@ +{ + "reference-entry": {"cobj":"0013", "value": "Part of book or chapter of book"}, + "report": {"cobj":"0017", "value": "Report"}, + "dataset": {"cobj":"0021", "value": "Dataset"}, + "journal-article": {"cobj":"0001", "value": "Article"}, + "reference-book": {"cobj":"0002", "value": "Book"}, + "other": {"cobj":"0020", "value": "Other ORP type"}, + "proceedings-article": {"cobj":"0004", "value": "Conference object"}, + "standard": {"cobj":"0038", "value": "Other literature type"}, + "book-part": {"cobj":"0002", "value": "Book"}, + "monograph": {"cobj":"0002", "value": "Book"}, + "report-series": {"cobj":"0017", "value": "Report"}, + "book": {"cobj":"0002", "value": "Book"}, + "book-chapter": {"cobj":"0013", "value": "Part of book or chapter of book"}, + "peer-review": {"cobj":"0015", "value": "Review"}, + "book-section": {"cobj":"0013", "value": "Part of book or chapter of book"}, + "book-review": {"cobj":"0015", "value": "Review"}, + "conference-abstract": {"cobj":"0004", "value": "Conference object"}, + "conference-paper": {"cobj":"0004", "value": "Conference object"}, + "conference-poster": {"cobj":"0004", "value": "Conference object"}, + "data-set": {"cobj":"0021", "value": "Dataset"}, + "dictionary-entry": {"cobj":"0038", "value": "Other literature type"}, + "disclosure": {"cobj":"0038", "value": "Other literature type"}, + "dissertation": {"cobj":"0006", "value": "Doctoral thesis"}, + "edited-book": {"cobj":"0002", "value": "Book"}, + "encyclopedia-entry": {"cobj":"0038", "value": "Other literature type"}, + "lecture-speech": {"cobj":"0010", "value": "Lecture"}, + "license": {"cobj":"0038", "value": "Other literature type"}, + "magazine-article": {"cobj":"0005", "value": "Contribution for newspaper or weekly magazine"}, + "manual": {"cobj":"0038", "value": "Other literature type"}, + "newsletter-article": {"cobj":"0012", "value": "Newsletter"}, + "newspaper-article": {"cobj":"0005", "value": "Contribution for newspaper or weekly magazine"}, + "patent": {"cobj":"0019", "value": "Patent"}, + "research-technique": {"cobj":"0020", "value": "Other ORP type"}, + "research-tool": {"cobj":"0020", "value": "Other ORP type"}, + "standards-and-policy": {"cobj":"0038", "value": "Other literature type"}, + "supervised-student-publication": {"cobj":"0001", "value": "Article"}, + "technical-standard": {"cobj":"0038", "value": "Other literature type"}, + "website": {"cobj":"0020", "value": "Other ORP type"}, + "working-paper": {"cobj":"0014", "value": "Research"} +} \ No newline at end of file From 1729cc5cf320c32cdafa2523d884d965ccefdc98 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 2 Jul 2020 18:46:20 +0200 Subject: [PATCH 006/108] publication conversion from json to oaf test --- .../orcidnodoi/oaf/OrcidWorkToOAF.java | 420 ---------------- .../orcidnodoi/oaf/PublicationToOaf.java | 456 ++++++++++++++++++ .../orcidnodoi/util/DumpToActionsUtility.java | 184 +++---- .../doiboost/orcidnodoi/util/Pair.java | 40 +- .../doiboost/orcid/OrcidClientTest.java | 2 +- .../orcidnodoi/PublicationToOafTest.java | 76 +++ .../orcidnodoi/xml/OrcidNoDoiTest.java | 3 +- .../doiboost/orcidnodoi/publication.json | 1 + 8 files changed, 650 insertions(+), 532 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/OrcidWorkToOAF.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java create mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/publication.json diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/OrcidWorkToOAF.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/OrcidWorkToOAF.java deleted file mode 100644 index 673abb407..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/OrcidWorkToOAF.java +++ /dev/null @@ -1,420 +0,0 @@ - -package eu.dnetlib.doiboost.orcidnodoi.oaf; - -import com.google.gson.Gson; -import com.google.gson.JsonArray; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import eu.dnetlib.dhp.common.PacePerson; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.doiboost.orcidnodoi.SparkGenEnrichedOrcidWorks; -import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; -import eu.dnetlib.doiboost.orcidnodoi.util.Pair; -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.*; -import java.util.stream.Collectors; - -import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*; - -public class OrcidWorkToOAF { - - static Logger logger = LoggerFactory.getLogger(OrcidWorkToOAF.class); - - public static final String ORCID = "ORCID"; - public final static String orcidPREFIX = "orcid_______"; - public static final String OPENAIRE_PREFIX = "openaire____"; - public static final String SEPARATOR = "::"; - - private static Map> datasources = new HashMap>() { - - { - put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid")); - - } - }; - - // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname - private static Map> externalIds = new HashMap>() { - - { - put("ark".toLowerCase(), new Pair<>("ark", "ark")); - put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv")); - put("pmc".toLowerCase(), new Pair<>("pmc", "pmc")); - put("pmid".toLowerCase(), new Pair<>("pmid", "pmid")); - put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid")); - put("urn".toLowerCase(), new Pair<>("urn", "urn")); - } - }; - - static Map> typologiesMapping; - - static { - try { - final String tt = IOUtils.toString(OrcidWorkToOAF.class.getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json")); - typologiesMapping = new Gson().fromJson(tt, Map.class); - } catch (final Exception e) { - logger.error("loading typologies", e); - } - } - - public static final String PID_TYPES = "dnet:pid_types"; - - public static Oaf generatePublicationActionsFromDump(final JsonObject rootElement, final String setName) { - - if (!isValid(rootElement/*, context*/)) { return null; } - - Publication publication = new Publication(); - - final DataInfo dataInfo = new DataInfo(); - dataInfo.setDeletedbyinference(false); - dataInfo.setInferred(false); - dataInfo.setTrust("0.9"); - dataInfo.setProvenanceaction( - mapQualifier( - "sysimport:actionset:orcidworks-no-doi", - "sysimport:actionset:orcidworks-no-doi", - "dnet:provenanceActions", - "dnet:provenanceActions")); - publication.setDataInfo(dataInfo); - - publication.setLastupdatetimestamp(new Date().getTime()); - - publication.setDateofcollection("2019-10-22"); - publication.setDateoftransformation(DumpToActionsUtility.now_ISO8601()); - - // Adding external ids - externalIds.keySet().stream() - .forEach(jsonExtId -> { - final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue(); - final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey(); - final String extId = getStringValue(rootElement, jsonExtId); - if (StringUtils.isNotBlank(extId)) { - publication.getExternalReference().add( - convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types")); - } - }); - - // Adding source -// final String source = getStringValue(rootElement, "source"); -// if (StringUtils.isNotBlank(source)) { -// metadata.addSource(StringField.newBuilder().setValue(source).build()); -// } - - // Adding titles - final List titles = createRepeatedField(rootElement, "titles"); - if (titles==null || titles.isEmpty()) { -// context.incrementCounter("filtered", "title_not_found", 1); - return null; - } - Qualifier q = mapQualifier("main title","main title","dnet:dataCite_title","dnet:dataCite_title"); - publication.setTitle( - titles - .stream() - .map(t -> { - return mapStructuredProperty(t, q, null); - }) - .collect(Collectors.toList())); - // Adding identifier - final String id = getStringValue(rootElement, "id"); - String sourceId = null; - if (id != null) { - publication.setOriginalId(Arrays.asList(id)); - sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(id.toLowerCase())); - } else { - String mergedTitle = titles.stream().map(Object::toString).collect(Collectors.joining(",")); - sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(mergedTitle.toLowerCase())); - } - publication.setId(sourceId); - - // Adding relevant date - settingRelevantDate(rootElement, publication, "publication_date", "issued", true); - - // Adding collectedfrom - publication.setCollectedfrom(Arrays.asList(createCollectedFrom())); - - // Adding type - final String type = getStringValue(rootElement, "type"); - String cobjValue = ""; - if (StringUtils.isNotBlank(type)) { - publication.setResourcetype(mapQualifier(type, type, "dnet:dataCite_resource", "dnet:dataCite_resource")); - - final String typeValue = typologiesMapping.get(type).get("value"); - cobjValue = typologiesMapping.get(type).get("cobj"); - final Instance instance = new Instance(); - - // Adding hostedby - instance.setHostedby(createHostedBy()); - - // Adding url - final List urls = createRepeatedField(rootElement, "urls"); - if (urls!=null && !urls.isEmpty()) { - instance.setUrl(urls); - } - - final String pubDate = getPublicationDate(rootElement, "publication_date"); - if (StringUtils.isNotBlank(pubDate)) { - instance.setDateofacceptance(mapStringField(pubDate, null)); - } - - instance.setCollectedfrom(createCollectedFrom()); - - // Adding accessright - instance.setAccessright(mapQualifier("UNKNOWN", "UNKNOWN", "dnet:access_modes", "dnet:access_modes")); - - // Adding type - instance.setInstancetype(mapQualifier(cobjValue, typeValue, "dnet:publication_resource", "dnet:publication_resource")); - - publication.setInstance(Arrays.asList(instance)); - } else { -// context.incrementCounter("filtered", "type_not_found", 1); - return null; - } - - // Adding authors - final List authors = createAuthors(rootElement); - if (authors != null && authors.size() > 0) { - publication.setAuthor(authors); - } else { -// context.incrementCounter("filtered", "author_not_found", 1); - return null; - } - String classValue = getDefaultResulttype(cobjValue); - publication.setResulttype(mapQualifier(classValue, classValue,"dnet:result_typologies", "dnet:result_typologies")); - return publication; - } - - public static List createAuthors(final JsonObject root) { - - final String authorsJSONFieldName = "authors"; - - if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) { - - final List authors = new ArrayList<>(); - final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName); - int firstCounter = 0; - int defaultCounter = 0; - int rank = 1; - int currentRank = 0; - - for (final JsonElement item : jsonAuthors) { - final JsonObject jsonAuthor = item.getAsJsonObject(); - final Author author = new Author(); - if (item.isJsonObject()) { - final String surname = getStringValue(jsonAuthor, "surname"); - final String name = getStringValue(jsonAuthor, "name"); - final String oid = getStringValue(jsonAuthor, "oid"); - final String seq = getStringValue(jsonAuthor, "seq"); - if (StringUtils.isNotBlank(seq)) { - if (seq.equals("first")) { - firstCounter += 1; - rank = firstCounter; - - } else if (seq.equals("additional")) { - rank = currentRank + 1; - } else { - defaultCounter += 1; - rank = defaultCounter; - } - } - - if (StringUtils.isNotBlank(oid)) { - author.setPid(Arrays.asList(mapAuthorId(oid))); - author.setFullname(name + " " + surname); - if (StringUtils.isNotBlank(name)) { - author.setName(name); - } - if (StringUtils.isNotBlank(surname)) { - author.setSurname(surname); - } - } else { - String fullname = ""; - if (StringUtils.isNotBlank(name)) { - fullname = name; - } else { - if (StringUtils.isNotBlank(surname)) { - fullname = surname; - } - } - PacePerson p = new PacePerson(fullname, false); - if (p.isAccurate()) { - author.setName(p.getNormalisedFirstName()); - author.setSurname(p.getNormalisedSurname()); - author.setFullname(p.getNormalisedFullname()); - } - else { - author.setFullname(fullname); - } - } - } - author.setRank(rank); - authors.add(author); - currentRank = rank; - } - return authors; - - } - return null; - } - - private static List createRepeatedField(final JsonObject rootElement, final String fieldName) { - if (!rootElement.has(fieldName)) { return null; } - if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) { return null; } - if (rootElement.get(fieldName).isJsonArray()) { - if (!isValidJsonArray(rootElement, fieldName)) { return null; } - return getArrayValues(rootElement, fieldName); - } else { - String field = getStringValue(rootElement, fieldName); - return Arrays.asList(cleanField(field)); - } - } - - private static String cleanField(String value) { - if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') { - value = value.substring(1, value.length() - 1); - } - return value; - } - - private static void settingRelevantDate(final JsonObject rootElement, - final Publication publication, - final String jsonKey, - final String dictionaryKey, - final boolean addToDateOfAcceptance) { - - final String pubDate = getPublicationDate(rootElement, "publication_date"); - if (StringUtils.isNotBlank(pubDate)) { - if (addToDateOfAcceptance) { - publication.setDateofacceptance(mapStringField(pubDate, null)); - } - Qualifier q = mapQualifier(dictionaryKey,dictionaryKey,"dnet:dataCite_date","dnet:dataCite_date"); - publication.setRelevantdate( - Arrays.asList(pubDate) - .stream() - .map(r -> { - return mapStructuredProperty(r, q, null); - }) - .collect(Collectors.toList())); - } - } - - private static String getPublicationDate(final JsonObject rootElement, - final String jsonKey) { - - final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey); - if (pubDateJson == null) { return null; } - final String year = getStringValue(pubDateJson, "year"); - final String month = getStringValue(pubDateJson, "month"); - final String day = getStringValue(pubDateJson, "day"); - - if (StringUtils.isBlank(year)) { return null; } - String pubDate = "".concat(year); - if (StringUtils.isNotBlank(month)) { - pubDate = pubDate.concat("-" + month); - if (StringUtils.isNotBlank(day)) { - pubDate = pubDate.concat("-" + day); - } else { - pubDate += "-01"; - } - } else { - pubDate += "-01-01"; - } - if (isValidDate(pubDate)) { return pubDate; } - return null; - } - - protected static boolean isValid(final JsonObject rootElement/*, final Reporter context*/) { - - final String type = getStringValue(rootElement, "type"); - if (!typologiesMapping.containsKey(type)) { -// context.incrementCounter("filtered", "unknowntype_" + type, 1); - return false; - } - - if (!isValidJsonArray(rootElement, "titles")) { -// context.incrementCounter("filtered", "invalid_title", 1); - return false; - } - return true; - } - - private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) { - if (!rootElement.has(fieldName)) { return false; } - final JsonElement jsonElement = rootElement.get(fieldName); - if (jsonElement.isJsonNull()) { return false; } - if (jsonElement.isJsonArray()) { - final JsonArray jsonArray = jsonElement.getAsJsonArray(); - if (jsonArray.isJsonNull()) { return false; } - if (jsonArray.get(0).isJsonNull()) { return false; } - } - return true; - } - - private static Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) { - final Qualifier qualifier = new Qualifier(); - qualifier.setClassid(classId); - qualifier.setClassname(className); - qualifier.setSchemeid(schemeId); - qualifier.setSchemename(schemeName); - return qualifier; - } - - private static ExternalReference convertExtRef(String extId, String classId, String className, String schemeId, String schemeName) { - ExternalReference ex = new ExternalReference(); - ex.setRefidentifier(extId); - ex.setQualifier(mapQualifier(classId, className, schemeId, schemeName )); - return ex; - } - - private static StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) { - if (value == null | StringUtils.isBlank(value)) { - return null; - } - - final StructuredProperty structuredProperty = new StructuredProperty(); - structuredProperty.setValue(value); - structuredProperty.setQualifier(qualifier); - structuredProperty.setDataInfo(dataInfo); - return structuredProperty; - } - - private static Field mapStringField(String value, DataInfo dataInfo) { - if (value == null || StringUtils.isBlank(value)) { - return null; - } - - final Field stringField = new Field<>(); - stringField.setValue(value); - stringField.setDataInfo(dataInfo); - return stringField; - } - - private static KeyValue createCollectedFrom() { - KeyValue cf = new KeyValue(); - cf.setValue(ORCID); - cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a"); - return cf; - } - - private static KeyValue createHostedBy() { - KeyValue hb = new KeyValue(); - hb.setValue("Unknown Repository"); - hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c"); - return hb; - } - - private static StructuredProperty mapAuthorId(String orcidId) { - final StructuredProperty sp = new StructuredProperty(); - sp.setValue(orcidId); - final Qualifier q = new Qualifier(); - q.setClassid("ORCID"); - q.setClassname("ORCID"); - sp.setQualifier(q); - return sp; - } -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java new file mode 100644 index 000000000..dc03767ec --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -0,0 +1,456 @@ + +package eu.dnetlib.doiboost.orcidnodoi.oaf; + +import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*; + +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.gson.Gson; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; + +import eu.dnetlib.dhp.common.PacePerson; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.utils.DHPUtils; +import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; +import eu.dnetlib.doiboost.orcidnodoi.util.Pair; + +public class PublicationToOaf { + + static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class); + + public static final String ORCID = "ORCID"; + public final static String orcidPREFIX = "orcid_______"; + public static final String OPENAIRE_PREFIX = "openaire____"; + public static final String SEPARATOR = "::"; + + private static Map> datasources = new HashMap>() { + + { + put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid")); + + } + }; + + // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname + private static Map> externalIds = new HashMap>() { + + { + put("ark".toLowerCase(), new Pair<>("ark", "ark")); + put("arxiv".toLowerCase(), new Pair<>("arxiv", "arXiv")); + put("pmc".toLowerCase(), new Pair<>("pmc", "pmc")); + put("pmid".toLowerCase(), new Pair<>("pmid", "pmid")); + put("source-work-id".toLowerCase(), new Pair<>("orcidworkid", "orcidworkid")); + put("urn".toLowerCase(), new Pair<>("urn", "urn")); + } + }; + + static Map> typologiesMapping; + + static { + try { + final String tt = IOUtils + .toString( + PublicationToOaf.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json")); + typologiesMapping = new Gson().fromJson(tt, Map.class); + } catch (final Exception e) { + logger.error("loading typologies", e); + } + } + + public static final String PID_TYPES = "dnet:pid_types"; + + public static Oaf generatePublicationActionsFromDump(final JsonObject rootElement) { + + logger.debug("generatePublicationActionsFromDump ..."); + if (!isValid(rootElement/* , context */)) { + logger.error("publication not valid"); + return null; + } + + Publication publication = new Publication(); + + final DataInfo dataInfo = new DataInfo(); + dataInfo.setDeletedbyinference(false); + dataInfo.setInferred(false); + dataInfo.setTrust("0.9"); + dataInfo + .setProvenanceaction( + mapQualifier( + "sysimport:actionset:orcidworks-no-doi", + "sysimport:actionset:orcidworks-no-doi", + "dnet:provenanceActions", + "dnet:provenanceActions")); + publication.setDataInfo(dataInfo); + + publication.setLastupdatetimestamp(new Date().getTime()); + + publication.setDateofcollection("2019-10-22"); + publication.setDateoftransformation(DumpToActionsUtility.now_ISO8601()); + + // Adding external ids + externalIds + .keySet() + .stream() + .forEach(jsonExtId -> { + final String classid = externalIds.get(jsonExtId.toLowerCase()).getValue(); + final String classname = externalIds.get(jsonExtId.toLowerCase()).getKey(); + final String extId = getStringValue(rootElement, jsonExtId); + if (StringUtils.isNotBlank(extId)) { + publication + .getExternalReference() + .add( + convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types")); + } + }); + + // Adding source + final String source = getStringValue(rootElement, "sourceName"); + if (StringUtils.isNotBlank(source)) { + publication.setSource(Arrays.asList(mapStringField(source, null))); + } + + // Adding titles + final List titles = createRepeatedField(rootElement, "titles"); + if (titles == null || titles.isEmpty()) { + logger.error("titles not found"); +// context.incrementCounter("filtered", "title_not_found", 1); + return null; + } + Qualifier q = mapQualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + publication + .setTitle( + titles + .stream() + .map(t -> { + return mapStructuredProperty(t, q, null); + }) + .collect(Collectors.toList())); + // Adding identifier + final String id = getStringValue(rootElement, "id"); + String sourceId = null; + if (id != null) { + publication.setOriginalId(Arrays.asList(id)); + sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(id.toLowerCase())); + } else { + String mergedTitle = titles.stream().map(Object::toString).collect(Collectors.joining(",")); + sourceId = String.format("50|%s" + SEPARATOR + "%s", orcidPREFIX, DHPUtils.md5(mergedTitle.toLowerCase())); + } + publication.setId(sourceId); + + // Adding relevant date + settingRelevantDate(rootElement, publication, "publication_date", "issued", true); + + // Adding collectedfrom + publication.setCollectedfrom(Arrays.asList(createCollectedFrom())); + + // Adding type + final String type = getStringValue(rootElement, "type"); + String cobjValue = ""; + if (StringUtils.isNotBlank(type)) { + publication.setResourcetype(mapQualifier(type, type, "dnet:dataCite_resource", "dnet:dataCite_resource")); + + final String typeValue = typologiesMapping.get(type).get("value"); + cobjValue = typologiesMapping.get(type).get("cobj"); + final Instance instance = new Instance(); + + // Adding hostedby + instance.setHostedby(createHostedBy()); + + // Adding url + final List urls = createRepeatedField(rootElement, "urls"); + if (urls != null && !urls.isEmpty()) { + instance.setUrl(urls); + } + + final String pubDate = getPublicationDate(rootElement, "publication_date"); + if (StringUtils.isNotBlank(pubDate)) { + instance.setDateofacceptance(mapStringField(pubDate, null)); + } + + instance.setCollectedfrom(createCollectedFrom()); + + // Adding accessright + instance.setAccessright(mapQualifier("UNKNOWN", "UNKNOWN", "dnet:access_modes", "dnet:access_modes")); + + // Adding type + instance + .setInstancetype( + mapQualifier(cobjValue, typeValue, "dnet:publication_resource", "dnet:publication_resource")); + + publication.setInstance(Arrays.asList(instance)); + } else { + logger.error("type not found"); +// context.incrementCounter("filtered", "type_not_found", 1); + return null; + } + + // Adding authors + final List authors = createAuthors(rootElement); + if (authors != null && authors.size() > 0) { + publication.setAuthor(authors); + } else { + logger.error("authors not found"); +// context.incrementCounter("filtered", "author_not_found", 1); + return null; + } + String classValue = getDefaultResulttype(cobjValue); + publication + .setResulttype(mapQualifier(classValue, classValue, "dnet:result_typologies", "dnet:result_typologies")); + return publication; + } + + public static List createAuthors(final JsonObject root) { + + final String authorsJSONFieldName = "contributors"; + + if (root.has(authorsJSONFieldName) && root.get(authorsJSONFieldName).isJsonArray()) { + + final List authors = new ArrayList<>(); + final JsonArray jsonAuthors = root.getAsJsonArray(authorsJSONFieldName); + int firstCounter = 0; + int defaultCounter = 0; + int rank = 1; + int currentRank = 0; + + for (final JsonElement item : jsonAuthors) { + final JsonObject jsonAuthor = item.getAsJsonObject(); + final Author author = new Author(); + if (item.isJsonObject()) { + final String creditname = getStringValue(jsonAuthor, "creditName"); + final String surname = getStringValue(jsonAuthor, "surname"); + final String name = getStringValue(jsonAuthor, "name"); + final String oid = getStringValue(jsonAuthor, "oid"); + final String seq = getStringValue(jsonAuthor, "sequence"); + if (StringUtils.isNotBlank(seq)) { + if (seq.equals("first")) { + firstCounter += 1; + rank = firstCounter; + + } else if (seq.equals("additional")) { + rank = currentRank + 1; + } else { + defaultCounter += 1; + rank = defaultCounter; + } + } + if (StringUtils.isNotBlank(oid)) { + author.setPid(Arrays.asList(mapAuthorId(oid))); + author.setFullname(name + " " + surname); + if (StringUtils.isNotBlank(name)) { + author.setName(name); + } + if (StringUtils.isNotBlank(surname)) { + author.setSurname(surname); + } + } else { + PacePerson p = new PacePerson(creditname, false); + if (p.isAccurate()) { + author.setName(p.getNormalisedFirstName()); + author.setSurname(p.getNormalisedSurname()); + author.setFullname(p.getNormalisedFullname()); + } else { + author.setFullname(creditname); + } + } + } + author.setRank(rank); + authors.add(author); + currentRank = rank; + } + return authors; + + } + return null; + } + + private static List createRepeatedField(final JsonObject rootElement, final String fieldName) { + if (!rootElement.has(fieldName)) { + return null; + } + if (rootElement.has(fieldName) && rootElement.get(fieldName).isJsonNull()) { + return null; + } + if (rootElement.get(fieldName).isJsonArray()) { + if (!isValidJsonArray(rootElement, fieldName)) { + return null; + } + return getArrayValues(rootElement, fieldName); + } else { + String field = getStringValue(rootElement, fieldName); + return Arrays.asList(cleanField(field)); + } + } + + private static String cleanField(String value) { + if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') { + value = value.substring(1, value.length() - 1); + } + return value; + } + + private static void settingRelevantDate(final JsonObject rootElement, + final Publication publication, + final String jsonKey, + final String dictionaryKey, + final boolean addToDateOfAcceptance) { + + final String pubDate = getPublicationDate(rootElement, "publication_date"); + if (StringUtils.isNotBlank(pubDate)) { + if (addToDateOfAcceptance) { + publication.setDateofacceptance(mapStringField(pubDate, null)); + } + Qualifier q = mapQualifier(dictionaryKey, dictionaryKey, "dnet:dataCite_date", "dnet:dataCite_date"); + publication + .setRelevantdate( + Arrays + .asList(pubDate) + .stream() + .map(r -> { + return mapStructuredProperty(r, q, null); + }) + .collect(Collectors.toList())); + } + } + + private static String getPublicationDate(final JsonObject rootElement, + final String jsonKey) { + + final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey); + if (pubDateJson == null) { + return null; + } + final String year = getStringValue(pubDateJson, "year"); + final String month = getStringValue(pubDateJson, "month"); + final String day = getStringValue(pubDateJson, "day"); + + if (StringUtils.isBlank(year)) { + return null; + } + String pubDate = "".concat(year); + if (StringUtils.isNotBlank(month)) { + pubDate = pubDate.concat("-" + month); + if (StringUtils.isNotBlank(day)) { + pubDate = pubDate.concat("-" + day); + } else { + pubDate += "-01"; + } + } else { + pubDate += "-01-01"; + } + if (isValidDate(pubDate)) { + return pubDate; + } + return null; + } + + protected static boolean isValid(final JsonObject rootElement/* , final Reporter context */) { + + final String type = getStringValue(rootElement, "type"); + if (!typologiesMapping.containsKey(type)) { + logger.error("unknowntype_" + type); +// context.incrementCounter("filtered", "unknowntype_" + type, 1); + return false; + } + + if (!isValidJsonArray(rootElement, "titles")) { + logger.error("invalid_title"); +// context.incrementCounter("filtered", "invalid_title", 1); + return false; + } + return true; + } + + private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) { + if (!rootElement.has(fieldName)) { + return false; + } + final JsonElement jsonElement = rootElement.get(fieldName); + if (jsonElement.isJsonNull()) { + return false; + } + if (jsonElement.isJsonArray()) { + final JsonArray jsonArray = jsonElement.getAsJsonArray(); + if (jsonArray.isJsonNull()) { + return false; + } + if (jsonArray.get(0).isJsonNull()) { + return false; + } + } + return true; + } + + private static Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) { + final Qualifier qualifier = new Qualifier(); + qualifier.setClassid(classId); + qualifier.setClassname(className); + qualifier.setSchemeid(schemeId); + qualifier.setSchemename(schemeName); + return qualifier; + } + + private static ExternalReference convertExtRef(String extId, String classId, String className, String schemeId, + String schemeName) { + ExternalReference ex = new ExternalReference(); + ex.setRefidentifier(extId); + ex.setQualifier(mapQualifier(classId, className, schemeId, schemeName)); + return ex; + } + + private static StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) { + if (value == null | StringUtils.isBlank(value)) { + return null; + } + + final StructuredProperty structuredProperty = new StructuredProperty(); + structuredProperty.setValue(value); + structuredProperty.setQualifier(qualifier); + structuredProperty.setDataInfo(dataInfo); + return structuredProperty; + } + + private static Field mapStringField(String value, DataInfo dataInfo) { + if (value == null || StringUtils.isBlank(value)) { + return null; + } + + final Field stringField = new Field<>(); + stringField.setValue(value); + stringField.setDataInfo(dataInfo); + return stringField; + } + + private static KeyValue createCollectedFrom() { + KeyValue cf = new KeyValue(); + cf.setValue(ORCID); + cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a"); + return cf; + } + + private static KeyValue createHostedBy() { + KeyValue hb = new KeyValue(); + hb.setValue("Unknown Repository"); + hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c"); + return hb; + } + + private static StructuredProperty mapAuthorId(String orcidId) { + final StructuredProperty sp = new StructuredProperty(); + sp.setValue(orcidId); + final Qualifier q = new Qualifier(); + q.setClassid("ORCID"); + q.setClassname("ORCID"); + sp.setQualifier(q); + return sp; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java index c460f6299..9b9f3c8b2 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java @@ -1,107 +1,109 @@ -package eu.dnetlib.doiboost.orcidnodoi.util; -import com.google.gson.JsonArray; -import com.google.gson.JsonObject; -import org.apache.commons.lang3.StringUtils; +package eu.dnetlib.doiboost.orcidnodoi.util; import java.text.SimpleDateFormat; import java.util.*; +import org.apache.commons.lang3.StringUtils; + +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; + public class DumpToActionsUtility { - private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US); + private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US); - public static String getStringValue(final JsonObject root, final String key) { - if (root.has(key) && !root.get(key).isJsonNull()) - return root.get(key).getAsString(); - return null; - } + public static String getStringValue(final JsonObject root, final String key) { + if (root.has(key) && !root.get(key).isJsonNull()) + return root.get(key).getAsString(); + return null; + } - public static List getArrayValues(final JsonObject root, final String key) { - if (root.has(key) && root.get(key).isJsonArray()) { - final JsonArray asJsonArray = root.get(key).getAsJsonArray(); - final List result = new ArrayList<>(); + public static List getArrayValues(final JsonObject root, final String key) { + if (root.has(key) && root.get(key).isJsonArray()) { + final JsonArray asJsonArray = root.get(key).getAsJsonArray(); + final List result = new ArrayList<>(); + asJsonArray.forEach(it -> { + if (StringUtils.isNotBlank(it.getAsString())) { + result.add(it.getAsString()); + } + }); + return result; + } + return new ArrayList<>(); + } - asJsonArray.forEach(it -> { - if (StringUtils.isNotBlank(it.getAsString())) { - result.add(it.getAsString()); - } - }); - return result; - } - return new ArrayList<>(); - } - public static List getArrayObjects(final JsonObject root, final String key) { - if (root.has(key) && root.get(key).isJsonArray()) { - final JsonArray asJsonArray = root.get(key).getAsJsonArray(); - final List result = new ArrayList<>(); - asJsonArray.forEach(it -> { - if (it.getAsJsonObject() != null) { - result.add(it.getAsJsonObject()); - } - }); - return result; - } - return new ArrayList<>(); - } + public static List getArrayObjects(final JsonObject root, final String key) { + if (root.has(key) && root.get(key).isJsonArray()) { + final JsonArray asJsonArray = root.get(key).getAsJsonArray(); + final List result = new ArrayList<>(); + asJsonArray.forEach(it -> { + if (it.getAsJsonObject() != null) { + result.add(it.getAsJsonObject()); + } + }); + return result; + } + return new ArrayList<>(); + } - public static boolean isValidDate(final String date) { - return date.matches("\\d{4}-\\d{2}-\\d{2}"); - } + public static boolean isValidDate(final String date) { + return date.matches("\\d{4}-\\d{2}-\\d{2}"); + } - public static String now_ISO8601() { // NOPMD - String result; - synchronized (ISO8601FORMAT) { - result = ISO8601FORMAT.format(new Date()); - } - //convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00 - //- note the added colon for the Timezone - return result.substring(0, result.length() - 2) + ":" + result.substring(result.length() - 2); - } + public static String now_ISO8601() { // NOPMD + String result; + synchronized (ISO8601FORMAT) { + result = ISO8601FORMAT.format(new Date()); + } + // convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00 + // - note the added colon for the Timezone + return result.substring(0, result.length() - 2) + ":" + result.substring(result.length() - 2); + } - public static String getDefaultResulttype(final String cobjcategory) { - switch (cobjcategory) { - case "0029": - return "software"; - case "0021": - case "0024": - case "0025": - case "0030": - return "dataset"; - case "0000": - case "0010": - case "0018": - case "0020": - case "0022": - case "0023": - case "0026": - case "0027": - case "0028": - case "0037": - return "other"; - case "0001": - case "0002": - case "0004": - case "0005": - case "0006": - case "0007": - case "0008": - case "0009": - case "0011": - case "0012": - case "0013": - case "0014": - case "0015": - case "0016": - case "0017": - case "0019": - case "0031": - case "0032": - return "publication"; - default: - return "publication"; - } - } + public static String getDefaultResulttype(final String cobjcategory) { + switch (cobjcategory) { + case "0029": + return "software"; + case "0021": + case "0024": + case "0025": + case "0030": + return "dataset"; + case "0000": + case "0010": + case "0018": + case "0020": + case "0022": + case "0023": + case "0026": + case "0027": + case "0028": + case "0037": + return "other"; + case "0001": + case "0002": + case "0004": + case "0005": + case "0006": + case "0007": + case "0008": + case "0009": + case "0011": + case "0012": + case "0013": + case "0014": + case "0015": + case "0016": + case "0017": + case "0019": + case "0031": + case "0032": + return "publication"; + default: + return "publication"; + } + } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java index 58c09af60..8883d00f5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/Pair.java @@ -1,30 +1,32 @@ + package eu.dnetlib.doiboost.orcidnodoi.util; public class Pair { - private K k; + private K k; - private V v; + private V v; - public Pair(K k, V v) { - this.k = k; - this.v = v; - } + public Pair(K k, V v) { + this.k = k; + this.v = v; + } - public K getKey() { - return k; - } + public K getKey() { + return k; + } - public V getValue() { - return v; - } + public V getValue() { + return v; + } - @Override - public boolean equals(Object obj) { - if (obj instanceof Pair) { - Pair tmp = (Pair) obj; - return k.equals(tmp.getKey()) && v.equals(tmp.getValue()); - } else return false; - } + @Override + public boolean equals(Object obj) { + if (obj instanceof Pair) { + Pair tmp = (Pair) obj; + return k.equals(tmp.getKey()) && v.equals(tmp.getValue()); + } else + return false; + } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 75f857ca4..8b50f2d8f 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -54,7 +54,7 @@ public class OrcidClientTest { } // @Test - public void testLambdaFileParser() throws Exception { + private void testLambdaFileParser() throws Exception { try (BufferedReader br = new BufferedReader( new InputStreamReader(this.getClass().getResourceAsStream("last_modified.csv")))) { String line; diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java new file mode 100644 index 000000000..4d04e1a16 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java @@ -0,0 +1,76 @@ + +package eu.dnetlib.doiboost.orcidnodoi; + +import static org.junit.jupiter.api.Assertions.*; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.gson.JsonElement; +import com.google.gson.JsonParser; + +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; + +public class PublicationToOafTest { + + private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class); + + @Test +// @Ignore + public void convertOafPublicationTest() throws Exception { + String jsonPublication = IOUtils + .toString( + PublicationToOafTest.class.getResourceAsStream("publication.json")); + JsonElement j = new JsonParser().parse(jsonPublication); + logger.info("json publication loaded: " + j.toString()); + Publication oafPublication = (Publication) PublicationToOaf + .generatePublicationActionsFromDump(j.getAsJsonObject()); + assertNotNull(oafPublication.getId()); + assertNotNull(oafPublication.getOriginalId()); + assertEquals(oafPublication.getOriginalId().get(0), "60153327"); + logger.info("oafPublication.getId(): " + oafPublication.getId()); + assertEquals( + oafPublication.getTitle().get(0).getValue(), + "Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp"); + assertNotNull(oafPublication.getLastupdatetimestamp()); + assertNotNull(oafPublication.getDateofcollection()); + assertNotNull(oafPublication.getDateoftransformation()); + assertTrue(oafPublication.getAuthor().size() == 7); + oafPublication.getAuthor().forEach(a -> { + assertNotNull(a.getFullname()); + assertNotNull(a.getRank()); + logger.info("a.getFullname(): " + a.getFullname()); + if (a.getName() != null) { + logger.info("a.getName(): " + a.getName()); + } + if (a.getSurname() != null) { + logger.info("a.getSurname(): " + a.getSurname()); + } + logger.info("a.getRank(): " + a.getRank()); + if (a.getPid() != null) { + logger.info("a.getPid(): " + a.getPid().get(0).getValue()); + } + + }); + assertNotNull(oafPublication.getCollectedfrom()); + if (oafPublication.getSource() != null) { + logger.info((oafPublication.getSource().get(0).getValue())); + } + if (oafPublication.getExternalReference() != null) { + oafPublication.getExternalReference().forEach(e -> { + assertNotNull(e.getRefidentifier()); + assertEquals(e.getQualifier().getSchemeid(), "dnet:pid_types"); + }); + } + assertNotNull(oafPublication.getInstance()); + oafPublication.getInstance().forEach(i -> { + assertNotNull(i.getInstancetype().getClassid()); + logger.info("i.getInstancetype().getClassid(): " + i.getInstancetype().getClassid()); + assertNotNull(i.getInstancetype().getClassname()); + logger.info("i.getInstancetype().getClassname(): " + i.getInstancetype().getClassname()); + }); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index 6a5faddbd..d426b01f1 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -95,7 +95,8 @@ public class OrcidNoDoiTest { } @Test - public void authorMatchTest() throws Exception { + @Ignore + private void authorMatchTest() throws Exception { logger.info("running authorSimpleMatchTest ...."); String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml"; AuthorData author = new AuthorData(); diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/publication.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/publication.json new file mode 100644 index 000000000..579e12f2e --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/publication.json @@ -0,0 +1 @@ +{"oid":"0000-0002-4147-3387","id":"60153327","sourceName":"The Chinese University of Hong Kong","type":"conference-paper","titles":["Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp"],"extIds":[{"type":"wosuid","value":"000425015800225","relationShip":"self"},{"type":"other-id","value":"441f521e-ab19-448d-ba32-83157b348ada","relationShip":"self"}],"publicationDates":[],"contributors":[{"sequence":"1","oid":"0000-0002-4147-3387","name":"Elaine","surname":"Chow","creditName":"Elaine Chow"},{"sequence":"2","creditName":"Victor Tsui"},{"sequence":"3","creditName":"Achim Müller"},{"sequence":"4","creditName":"Vincy Lee"},{"sequence":"5","creditName":"Lucia Krivánekova"},{"sequence":"6","creditName":"Roland Krivánek"},{"sequence":"7","creditName":"Juliana CN Chan"}]} \ No newline at end of file From ca37d3427bc4bfe05932c9231e11ccdfb98752f2 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Fri, 3 Jul 2020 23:30:31 +0200 Subject: [PATCH 007/108] separate workflow to parse orcid summaries, activities and generate dataset with no doi publications; test --- .../orcid/OrcidAuthorsDOIsDataGen.java | 8 +- .../doiboost/orcid/OrcidDSManager.java | 14 +- .../doiboost/orcid/OrcidDownloader.java | 8 +- .../orcidnodoi/ActivitiesDumpReader.java | 6 +- .../orcidnodoi/GenOrcidAuthorWork.java | 3 +- .../SparkGenEnrichedOrcidWorks.java | 18 +- .../orcidnodoi/oaf/PublicationToOaf.java | 9 +- .../doiboost/create_orcid_authors_data.json | 2 +- .../create_orcid_authors_dois_data.json | 2 +- .../dhp/doiboost/download_orcid_data.json | 2 +- .../oozie_app/workflow.xml | 497 +---------------- .../dhp/doiboost/orcid/oozie_app/workflow.xml | 44 +- .../oozie_app/config-default.xml | 31 ++ .../orcid_activities/oozie_app/workflow.xml | 514 ++++++++++++++++++ .../oozie_app/config-default.xml | 22 + .../orcid_summaries/oozie_app/workflow.xml | 68 +++ .../doiboost/orcid/OrcidClientTest.java | 29 +- .../orcidnodoi/PublicationToOafTest.java | 5 +- .../orcidnodoi/xml/OrcidNoDoiTest.java | 4 +- .../xml/activity_work_0000-0002-2536-4498.xml | 72 +++ 20 files changed, 815 insertions(+), 543 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0002-2536-4498.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java index 70528a8f6..2ec4fe59d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java @@ -25,8 +25,8 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager { public void generateAuthorsDOIsData() throws Exception { Configuration conf = initConfigurationObject(); FileSystem fs = initFileSystemObject(conf); - String tarGzUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(activitiesFileNameTarGz); - Path outputPath = new Path(hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(outputAuthorsDOIsPath)); + String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); + Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputAuthorsDOIsPath)); ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath); } @@ -41,8 +41,8 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager { hdfsServerUri = parser.get("hdfsServerUri"); Log.info("HDFS URI: " + hdfsServerUri); - hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath"); - Log.info("Default Path: " + hdfsOrcidDefaultPath); + workingPath = parser.get("workingPath"); + Log.info("Default Path: " + workingPath); activitiesFileNameTarGz = parser.get("activitiesFileNameTarGz"); Log.info("Activities File Name: " + activitiesFileNameTarGz); outputAuthorsDOIsPath = parser.get("outputAuthorsDOIsPath"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java index 4f846bdf3..aa61c0117 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java @@ -15,7 +15,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; public class OrcidDSManager { protected String hdfsServerUri; - protected String hdfsOrcidDefaultPath; + protected String workingPath; private String summariesFileNameTarGz; private String outputAuthorsPath; @@ -28,10 +28,10 @@ public class OrcidDSManager { public void generateAuthors() throws Exception { Configuration conf = initConfigurationObject(); FileSystem fs = initFileSystemObject(conf); - String tarGzUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(summariesFileNameTarGz); + String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz); Path outputPath = new Path( hdfsServerUri - .concat(hdfsOrcidDefaultPath) + .concat(workingPath) .concat(outputAuthorsPath) .concat("authors.seq")); SummariesDecompressor.parseGzSummaries(conf, tarGzUri, outputPath); @@ -41,7 +41,7 @@ public class OrcidDSManager { // ====== Init HDFS File System Object Configuration conf = new Configuration(); // Set FileSystem URI - conf.set("fs.defaultFS", hdfsServerUri.concat(hdfsOrcidDefaultPath)); + conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath)); // Because of Maven conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); @@ -52,7 +52,7 @@ public class OrcidDSManager { // Get the filesystem - HDFS FileSystem fs = null; try { - fs = FileSystem.get(URI.create(hdfsServerUri.concat(hdfsOrcidDefaultPath)), conf); + fs = FileSystem.get(URI.create(hdfsServerUri.concat(workingPath)), conf); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -71,8 +71,8 @@ public class OrcidDSManager { hdfsServerUri = parser.get("hdfsServerUri"); Log.info("HDFS URI: " + hdfsServerUri); - hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath"); - Log.info("Default Path: " + hdfsOrcidDefaultPath); + workingPath = parser.get("workingPath"); + Log.info("Working Path: " + workingPath); summariesFileNameTarGz = parser.get("summariesFileNameTarGz"); Log.info("Summaries File Name: " + summariesFileNameTarGz); outputAuthorsPath = parser.get("outputAuthorsPath"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java index 2e1a199da..762d8aecd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java @@ -69,12 +69,12 @@ public class OrcidDownloader extends OrcidDSManager { long startDownload = 0; Configuration conf = initConfigurationObject(); FileSystem fs = initFileSystemObject(conf); - String lambdaFileUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(lambdaFileName); + String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); Path hdfsreadpath = new Path(lambdaFileUri); FSDataInputStream lambdaFileStream = fs.open(hdfsreadpath); Path hdfsoutputPath = new Path( hdfsServerUri - .concat(hdfsOrcidDefaultPath) + .concat(workingPath) .concat(outputPath) .concat("orcid_records.seq")); @@ -176,8 +176,8 @@ public class OrcidDownloader extends OrcidDSManager { hdfsServerUri = parser.get("hdfsServerUri"); Log.info("HDFS URI: " + hdfsServerUri); - hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath"); - Log.info("Default Path: " + hdfsOrcidDefaultPath); + workingPath = parser.get("workingPath"); + Log.info("Default Path: " + workingPath); lambdaFileName = parser.get("lambdaFileName"); Log.info("Lambda File Name: " + lambdaFileName); outputPath = parser.get("outputPath"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java index bf63568d8..807f52972 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -26,8 +26,8 @@ import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; public class ActivitiesDumpReader { - private static final int MAX_XML_WORKS_PARSED = 100; - private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 10; + private static final int MAX_XML_WORKS_PARSED = -1; + private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) throws Exception { @@ -127,7 +127,7 @@ public class ActivitiesDumpReader { Log .warn( "Parsing work from tar archive and xml work: " + filename + " " + e.getMessage()); - Log.warn(e); +// Log.warn(e); } if ((counter % XML_WORKS_PARSED_COUNTER_LOG_INTERVAL) == 0) { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java index 8dcee796c..041424ba9 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java @@ -16,7 +16,7 @@ public class GenOrcidAuthorWork extends OrcidDSManager { private String activitiesFileNameTarGz; private String outputWorksPath; - private String workingPath; +// private String workingPath; public static void main(String[] args) throws IOException, Exception { GenOrcidAuthorWork genOrcidAuthorWork = new GenOrcidAuthorWork(); @@ -45,7 +45,6 @@ public class GenOrcidAuthorWork extends OrcidDSManager { Log.info("HDFS URI: " + hdfsServerUri); workingPath = parser.get("workingPath"); Log.info("Working Path: " + workingPath); - hdfsOrcidDefaultPath = workingPath; activitiesFileNameTarGz = parser.get("activitiesFileNameTarGz"); Log.info("Activities File Name: " + activitiesFileNameTarGz); outputWorksPath = parser.get("outputWorksPath"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index ae1e4dae6..b0b989463 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -16,6 +16,7 @@ import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,9 +25,11 @@ import com.google.gson.JsonElement; import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.doiboost.orcid.json.JsonHelper; import eu.dnetlib.doiboost.orcid.model.AuthorData; import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; import scala.Tuple2; @@ -59,7 +62,7 @@ public class SparkGenEnrichedOrcidWorks { JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaPairRDD summariesRDD = sc - .sequenceFile(workingPath + "../orcid_summaries/output/authors.seq", Text.class, Text.class); + .sequenceFile(workingPath + "summaries/output/authors.seq", Text.class, Text.class); Dataset summariesDataset = spark .createDataset( summariesRDD.map(seq -> loadAuthorFromJson(seq._1(), seq._2())).rdd(), @@ -89,8 +92,19 @@ public class SparkGenEnrichedOrcidWorks { .filter(Objects::nonNull) .toJavaRDD(); logger.info("Works enriched data created: " + enrichedWorksRDD.count()); - enrichedWorksRDD.repartition(10).saveAsTextFile(workingPath + outputEnrichedWorksPath); + enrichedWorksRDD.saveAsTextFile(workingPath + outputEnrichedWorksPath); logger.info("Works enriched data saved"); + JavaRDD> oafPublicationRDD = enrichedWorksRDD.map(e -> { + JsonElement j = new JsonParser().parse(e._2()); + return new Tuple2<>(e._1(), (Publication) PublicationToOaf + .generatePublicationActionsFromDump(j.getAsJsonObject())); + }); + + Dataset> publicationDataset = spark + .createDataset( + oafPublicationRDD.repartition(1).rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.bean(Publication.class))); + publicationDataset.write().mode(SaveMode.Overwrite).save(workingPath + "no_doi_dataset/output"); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index dc03767ec..19bfe0f30 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -172,7 +172,7 @@ public class PublicationToOaf { instance.setUrl(urls); } - final String pubDate = getPublicationDate(rootElement, "publication_date"); + final String pubDate = getPublicationDate(rootElement, "publicationDates"); if (StringUtils.isNotBlank(pubDate)) { instance.setDateofacceptance(mapStringField(pubDate, null)); } @@ -325,7 +325,12 @@ public class PublicationToOaf { private static String getPublicationDate(final JsonObject rootElement, final String jsonKey) { - final JsonObject pubDateJson = rootElement.getAsJsonObject(jsonKey); + JsonObject pubDateJson = null; + try { + pubDateJson = rootElement.getAsJsonObject(jsonKey); + } catch (Exception e) { + return null; + } if (pubDateJson == null) { return null; } diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json index bf992b508..6f213e415 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json @@ -1,6 +1,6 @@ [ {"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true}, - {"paramName":"d", "paramLongName":"hdfsOrcidDefaultPath", "paramDescription": "the default work path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the default work path", "paramRequired": true}, {"paramName":"f", "paramLongName":"summariesFileNameTarGz", "paramDescription": "the name of the summaries orcid file", "paramRequired": true}, {"paramName":"o", "paramLongName":"outputAuthorsPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/create_orcid_authors_dois_data.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/create_orcid_authors_dois_data.json index 131c30125..b2f0fdeda 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/create_orcid_authors_dois_data.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/create_orcid_authors_dois_data.json @@ -1,6 +1,6 @@ [ {"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true}, - {"paramName":"d", "paramLongName":"hdfsOrcidDefaultPath", "paramDescription": "the default work path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the default work path", "paramRequired": true}, {"paramName":"f", "paramLongName":"activitiesFileNameTarGz", "paramDescription": "the name of the activities orcid file", "paramRequired": true}, {"paramName":"o", "paramLongName":"outputAuthorsDOIsPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_orcid_data.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_orcid_data.json index 444e487f7..8c69b168b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_orcid_data.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_orcid_data.json @@ -1,6 +1,6 @@ [ {"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true}, - {"paramName":"d", "paramLongName":"hdfsOrcidDefaultPath", "paramDescription": "the default work path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the default work path", "paramRequired": true}, {"paramName":"f", "paramLongName":"lambdaFileName", "paramDescription": "the name of the lambda file", "paramRequired": true}, {"paramName":"o", "paramLongName":"outputPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true}, {"paramName":"t", "paramLongName":"token", "paramDescription": "token to grant access", "paramRequired": true} diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml index df5e0e76f..a60af8b45 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml @@ -1,75 +1,9 @@ - workingPath_activities + workingPath the working dir base path - - shell_cmd_0 - wget -O /tmp/ORCID_2019_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/18017660 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_0.tar.gz /data/orcid_activities/ORCID_2019_activites_0.tar.gz ; rm -f /tmp/ORCID_2019_activites_0.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 0 - - - shell_cmd_1 - wget -O /tmp/ORCID_2019_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/18017675 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_1.tar.gz /data/orcid_activities/ORCID_2019_activites_1.tar.gz ; rm -f /tmp/ORCID_2019_activites_1.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 1 - - - shell_cmd_2 - wget -O /tmp/ORCID_2019_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/18017717 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_2.tar.gz /data/orcid_activities/ORCID_2019_activites_2.tar.gz ; rm -f /tmp/ORCID_2019_activites_2.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 2 - - - shell_cmd_3 - wget -O /tmp/ORCID_2019_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/18017765 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_3.tar.gz /data/orcid_activities/ORCID_2019_activites_3.tar.gz ; rm -f /tmp/ORCID_2019_activites_3.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 3 - - - shell_cmd_4 - wget -O /tmp/ORCID_2019_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/18017831 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_4.tar.gz /data/orcid_activities/ORCID_2019_activites_4.tar.gz ; rm -f /tmp/ORCID_2019_activites_4.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 4 - - - shell_cmd_5 - wget -O /tmp/ORCID_2019_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/18017987 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_5.tar.gz /data/orcid_activities/ORCID_2019_activites_5.tar.gz ; rm -f /tmp/ORCID_2019_activites_5.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 5 - - - shell_cmd_6 - wget -O /tmp/ORCID_2019_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/18018053 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_6.tar.gz /data/orcid_activities/ORCID_2019_activites_6.tar.gz ; rm -f /tmp/ORCID_2019_activites_6.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 6 - - - shell_cmd_7 - wget -O /tmp/ORCID_2019_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/18018023 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_7.tar.gz /data/orcid_activities/ORCID_2019_activites_7.tar.gz ; rm -f /tmp/ORCID_2019_activites_7.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 7 - - - shell_cmd_8 - wget -O /tmp/ORCID_2019_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/18018248 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_8.tar.gz /data/orcid_activities/ORCID_2019_activites_8.tar.gz ; rm -f /tmp/ORCID_2019_activites_8.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 8 - - - shell_cmd_9 - wget -O /tmp/ORCID_2019_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/18018029 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_9.tar.gz /data/orcid_activities/ORCID_2019_activites_9.tar.gz ; rm -f /tmp/ORCID_2019_activites_9.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file 9 - - - shell_cmd_X - wget -O /tmp/ORCID_2019_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/18018182 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_X.tar.gz /data/orcid_activities/ORCID_2019_activites_X.tar.gz ; rm -f /tmp/ORCID_2019_activites_X.tar.gz - - the shell command that downloads and puts to hdfs orcid activity file X - @@ -80,436 +14,11 @@ - - + - + - - - - - - - - - - - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_0.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_0} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_0.tar.gz - -owno_doi_works/works_0.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_1.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_1} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_1.tar.gz - -owno_doi_works/works_1.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_2.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_2} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_2.tar.gz - -owno_doi_works/works_2.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_3.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_3} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_3.tar.gz - -owno_doi_works/works_3.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_4.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_4} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_4.tar.gz - -owno_doi_works/works_4.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_5.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_5} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_5.tar.gz - -owno_doi_works/works_5.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_6.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_6} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_6.tar.gz - -owno_doi_works/works_6.seq - -oewno_doi_enriched_works/ - - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_7.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_7} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_7.tar.gz - -owno_doi_works/works_7.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_8.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_8} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_8.tar.gz - -owno_doi_works/works_8.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_9.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_9} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_9.tar.gz - -owno_doi_works/works_9.seq - -oewno_doi_enriched_works/ - - - - - - - - - ${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_X.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_X} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork - -w${workingPath_activities}/ - -n${nameNode} - -fORCID_2019_activites_X.tar.gz - -owno_doi_works/works_X.seq - -oewno_doi_enriched_works/ - - - - - - diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/workflow.xml index 7a8d04187..51e00dc0f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/workflow.xml @@ -1,9 +1,15 @@ - + workingPath the working dir base path + + shell_cmd_0 + wget -O /tmp/ORCID_2019_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/18017633 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_summaries.tar.gz /data/orcid_activities/ORCID_2019_summaries.tar.gz ; rm -f /tmp/ORCID_2019_summaries.tar.gz + + the shell command that downloads and puts to hdfs orcid summaries + @@ -15,24 +21,44 @@ - - + + - + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_summaries.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_0} + + + - - - + ${jobTracker} ${nameNode} eu.dnetlib.doiboost.orcid.OrcidDSManager - -d${workingPath}/ + -w${workingPath}/ -n${nameNode} -fORCID_2019_summaries.tar.gz - -ooutput/ + -osummaries/output/ diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml new file mode 100644 index 000000000..3068562d0 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml @@ -0,0 +1,31 @@ + + + oozie.action.sharelib.for.java + spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + + + oozie.launcher.mapreduce.map.java.opts + -Xmx4g + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml new file mode 100644 index 000000000..8f9a5123e --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml @@ -0,0 +1,514 @@ + + + + workingPath + the working dir base path + + + shell_cmd_0 + wget -O /tmp/ORCID_2019_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/18017660 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_0.tar.gz /data/orcid_activities/ORCID_2019_activites_0.tar.gz ; rm -f /tmp/ORCID_2019_activites_0.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 0 + + + shell_cmd_1 + wget -O /tmp/ORCID_2019_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/18017675 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_1.tar.gz /data/orcid_activities/ORCID_2019_activites_1.tar.gz ; rm -f /tmp/ORCID_2019_activites_1.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 1 + + + shell_cmd_2 + wget -O /tmp/ORCID_2019_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/18017717 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_2.tar.gz /data/orcid_activities/ORCID_2019_activites_2.tar.gz ; rm -f /tmp/ORCID_2019_activites_2.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 2 + + + shell_cmd_3 + wget -O /tmp/ORCID_2019_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/18017765 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_3.tar.gz /data/orcid_activities/ORCID_2019_activites_3.tar.gz ; rm -f /tmp/ORCID_2019_activites_3.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 3 + + + shell_cmd_4 + wget -O /tmp/ORCID_2019_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/18017831 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_4.tar.gz /data/orcid_activities/ORCID_2019_activites_4.tar.gz ; rm -f /tmp/ORCID_2019_activites_4.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 4 + + + shell_cmd_5 + wget -O /tmp/ORCID_2019_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/18017987 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_5.tar.gz /data/orcid_activities/ORCID_2019_activites_5.tar.gz ; rm -f /tmp/ORCID_2019_activites_5.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 5 + + + shell_cmd_6 + wget -O /tmp/ORCID_2019_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/18018053 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_6.tar.gz /data/orcid_activities/ORCID_2019_activites_6.tar.gz ; rm -f /tmp/ORCID_2019_activites_6.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 6 + + + shell_cmd_7 + wget -O /tmp/ORCID_2019_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/18018023 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_7.tar.gz /data/orcid_activities/ORCID_2019_activites_7.tar.gz ; rm -f /tmp/ORCID_2019_activites_7.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 7 + + + shell_cmd_8 + wget -O /tmp/ORCID_2019_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/18018248 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_8.tar.gz /data/orcid_activities/ORCID_2019_activites_8.tar.gz ; rm -f /tmp/ORCID_2019_activites_8.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 8 + + + shell_cmd_9 + wget -O /tmp/ORCID_2019_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/18018029 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_9.tar.gz /data/orcid_activities/ORCID_2019_activites_9.tar.gz ; rm -f /tmp/ORCID_2019_activites_9.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file 9 + + + shell_cmd_X + wget -O /tmp/ORCID_2019_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/18018182 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_X.tar.gz /data/orcid_activities/ORCID_2019_activites_X.tar.gz ; rm -f /tmp/ORCID_2019_activites_X.tar.gz + + the shell command that downloads and puts to hdfs orcid activity file X + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_activites_0.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_0} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath}/ + -n${nameNode} + -fORCID_2019_activites_0.tar.gz + -owno_doi_works/works_0.seq + -oewno_doi_enriched_works/ + + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_activites_1.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_1} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath}/ + -n${nameNode} + -fORCID_2019_activites_1.tar.gz + -owno_doi_works/works_1.seq + -oewno_doi_enriched_works/ + + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_activites_2.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_2} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath}/ + -n${nameNode} + -fORCID_2019_activites_2.tar.gz + -owno_doi_works/works_2.seq + -oewno_doi_enriched_works/ + + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_activites_3.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_3} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath}/ + -n${nameNode} + -fORCID_2019_activites_3.tar.gz + -owno_doi_works/works_3.seq + -oewno_doi_enriched_works/ + + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_activites_4.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_4} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath}/ + -n${nameNode} + -fORCID_2019_activites_4.tar.gz + -owno_doi_works/works_4.seq + -oewno_doi_enriched_works/ + + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_activites_5.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_5} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath}/ + -n${nameNode} + -fORCID_2019_activites_5.tar.gz + -owno_doi_works/works_5.seq + -oewno_doi_enriched_works/ + + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_activites_6.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_6} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath}/ + -n${nameNode} + -fORCID_2019_activites_6.tar.gz + -owno_doi_works/works_6.seq + -oewno_doi_enriched_works/ + + + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_activites_7.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_7} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath}/ + -n${nameNode} + -fORCID_2019_activites_7.tar.gz + -owno_doi_works/works_7.seq + -oewno_doi_enriched_works/ + + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_activites_8.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_8} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath}/ + -n${nameNode} + -fORCID_2019_activites_8.tar.gz + -owno_doi_works/works_8.seq + -oewno_doi_enriched_works/ + + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_activites_9.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_9} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath}/ + -n${nameNode} + -fORCID_2019_activites_9.tar.gz + -owno_doi_works/works_9.seq + -oewno_doi_enriched_works/ + + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_activites_X.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_X} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork + -w${workingPath}/ + -n${nameNode} + -fORCID_2019_activites_X.tar.gz + -owno_doi_works/works_X.seq + -oewno_doi_enriched_works/ + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml new file mode 100644 index 000000000..e77dd09c9 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml @@ -0,0 +1,22 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml new file mode 100644 index 000000000..3362cc67b --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml @@ -0,0 +1,68 @@ + + + + workingPath + the working dir base path + + + shell_cmd_0 + wget -O /tmp/ORCID_2019_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/18017633 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_summaries.tar.gz /data/orcid_activities/ORCID_2019_summaries.tar.gz ; rm -f /tmp/ORCID_2019_summaries.tar.gz + + the shell command that downloads and puts to hdfs orcid summaries + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + ${fs:exists(concat(workingPath,'/ORCID_2019_summaries.tar.gz'))} + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd_0} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.OrcidDSManager + -w${workingPath}/ + -n${nameNode} + -fORCID_2019_summaries.tar.gz + -osummaries/output/ + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 8b50f2d8f..5e0f91ecd 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -3,9 +3,8 @@ package eu.dnetlib.doiboost.orcid; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; +import java.io.*; +import java.nio.file.Files; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Arrays; @@ -20,6 +19,7 @@ import org.apache.http.impl.client.HttpClients; import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import jdk.nashorn.internal.ir.annotations.Ignore; public class OrcidClientTest { final String orcidId = "0000-0001-7291-3210"; @@ -32,11 +32,20 @@ public class OrcidClientTest { String lastUpdate = "2019-09-30 00:00:00"; String shortDate = "2020-05-06 16:06:11"; -// curl -i -H "Accept: application/vnd.orcid+xml" +// curl -i -H "Accept: application/vnd.orcid+xml" // -H 'Authorization: Bearer 78fdb232-7105-4086-8570-e153f4198e3d' // 'https://api.orcid.org/v3.0/0000-0001-7291-3210/record' - public String testDownloadRecord(String orcidId) throws Exception { + @Test + public void downloadTest() throws Exception { + String record = testDownloadRecord("0000-0002-2536-4498"); + File f = new File("/tmp/downloaded_0000-0002-2536-4498.xml"); + OutputStream outStream = new FileOutputStream(f); + IOUtils.write(record.getBytes(), outStream); + System.out.println("saved to tmp"); + } + + private String testDownloadRecord(String orcidId) throws Exception { try (CloseableHttpClient client = HttpClients.createDefault()) { HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); httpGet.addHeader("Accept", "application/vnd.orcid+xml"); @@ -100,7 +109,7 @@ public class OrcidClientTest { } // @Test - public void getRecordDatestamp() throws ParseException { + private void getRecordDatestamp() throws ParseException { Date toRetrieveDateDt = new SimpleDateFormat(DATE_FORMAT).parse(toRetrieveDate); Date toNotRetrieveDateDt = new SimpleDateFormat(DATE_FORMAT).parse(toNotRetrieveDate); Date lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate); @@ -108,7 +117,7 @@ public class OrcidClientTest { assertTrue(!toNotRetrieveDateDt.after(lastUpdateDt)); } - public void testDate(String value) throws ParseException { + private void testDate(String value) throws ParseException { System.out.println(value.toString()); if (value.length() != 19) { value = value.substring(0, 19); @@ -118,14 +127,16 @@ public class OrcidClientTest { } // @Test - public void testModifiedDate() throws ParseException { + @Ignore + private void testModifiedDate() throws ParseException { testDate(toRetrieveDate); testDate(toNotRetrieveDate); testDate(shortDate); } // @Test - public void testReadBase64CompressedRecord() throws Exception { + @Ignore + private void testReadBase64CompressedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0001-6645-509X.compressed.base64")); final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java index 4d04e1a16..39f78522f 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java @@ -13,14 +13,15 @@ import com.google.gson.JsonParser; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; +import jdk.nashorn.internal.ir.annotations.Ignore; public class PublicationToOafTest { private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class); @Test -// @Ignore - public void convertOafPublicationTest() throws Exception { + @Ignore + private void convertOafPublicationTest() throws Exception { String jsonPublication = IOUtils .toString( PublicationToOafTest.class.getResourceAsStream("publication.json")); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index d426b01f1..ca91a242a 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -42,12 +42,12 @@ public class OrcidNoDoiTest { @Test @Ignore - private void readPublicationFieldsTest() + public void readPublicationFieldsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); String xml = IOUtils .toString( - OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); + OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0002-2536-4498.xml")); if (xml == null) { logger.info("Resource not found"); diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0002-2536-4498.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0002-2536-4498.xml new file mode 100644 index 000000000..43d3b2351 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0002-2536-4498.xml @@ -0,0 +1,72 @@ + + + 2019-10-22T03:18:13.755Z + 2020-06-17T11:07:13.703Z + + + https://orcid.org/client/0000-0001-8607-8906 + 0000-0001-8607-8906 + orcid.org + + INSPIRE-HEP + + + Measurement of the $t\bar{t}$ production cross-section and lepton differential distributions in $e\mu$ dilepton events from $pp$ collisions at $\sqrt{s}=13$ TeV with the ATLAS detector + + + + other-id + 1759875 + 1759875 + http://inspirehep.net/record/1759875 + self + + + doi + 10.1140/epjc/s10052-020-7907-9 + 10.1140/epjc/s10052-020-7907-9 + http://dx.doi.org/10.1140/epjc/s10052-020-7907-9 + self + + + arxiv + 1910.08819 + arXiv:1910.08819 + http://arxiv.org/abs/1910.08819 + self + + + http://inspirehep.net/record/1759875 + journal-article + + 2020 + 06 + 12 + + Eur.Phys.J.C + From c82b15b5f4817348d446fbf53f8337a5e3601085 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Tue, 28 Jul 2020 15:23:52 +0200 Subject: [PATCH 008/108] migrate configuration to ocean, fix publication dataset creation --- .../SparkGenEnrichedOrcidWorks.java | 13 ++-- .../oozie_app/config-default.xml | 31 --------- .../oozie_app/workflow.xml | 68 ++++++++++++++++--- .../orcid/xml/XMLRecordParserTest.java | 6 +- 4 files changed, 68 insertions(+), 50 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index b0b989463..b24e71615 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -91,19 +91,18 @@ public class SparkGenEnrichedOrcidWorks { Encoders.tuple(Encoders.STRING(), Encoders.STRING())) .filter(Objects::nonNull) .toJavaRDD(); - logger.info("Works enriched data created: " + enrichedWorksRDD.count()); enrichedWorksRDD.saveAsTextFile(workingPath + outputEnrichedWorksPath); logger.info("Works enriched data saved"); - JavaRDD> oafPublicationRDD = enrichedWorksRDD.map(e -> { + JavaRDD oafPublicationRDD = enrichedWorksRDD.map(e -> { JsonElement j = new JsonParser().parse(e._2()); - return new Tuple2<>(e._1(), (Publication) PublicationToOaf - .generatePublicationActionsFromDump(j.getAsJsonObject())); - }); + return (Publication) PublicationToOaf + .generatePublicationActionsFromDump(j.getAsJsonObject()); + }).filter(p -> p != null); - Dataset> publicationDataset = spark + Dataset publicationDataset = spark .createDataset( oafPublicationRDD.repartition(1).rdd(), - Encoders.tuple(Encoders.STRING(), Encoders.bean(Publication.class))); + Encoders.bean(Publication.class)); publicationDataset.write().mode(SaveMode.Overwrite).save(workingPath + "no_doi_dataset/output"); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/config-default.xml deleted file mode 100644 index 3068562d0..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/config-default.xml +++ /dev/null @@ -1,31 +0,0 @@ - - - oozie.action.sharelib.for.java - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - oozie.launcher.mapreduce.map.java.opts - -Xmx4g - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml index a60af8b45..faed3104a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml @@ -1,11 +1,56 @@ - + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + workingPath the working dir base path + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + @@ -16,20 +61,25 @@ - + - + - ${jobTracker} - ${nameNode} - yarn + yarn-cluster cluster - Gen_Enriched_Orcid_Works + GenOrcidNoDoiDataset eu.dnetlib.doiboost.orcidnodoi.SparkGenEnrichedOrcidWorks - dhp-doiboost-1.2.4-SNAPSHOT.jar - --num-executors 10 --conf spark.yarn.jars="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2" --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} -w${workingPath}/ -n${nameNode} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java index 4d8237f77..5bf6f27b9 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java @@ -12,7 +12,7 @@ import eu.dnetlib.doiboost.orcid.model.WorkData; public class XMLRecordParserTest { @Test - public void testOrcidAuthorDataXMLParser() throws Exception { + private void testOrcidAuthorDataXMLParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_0000-0001-6828-479X.xml")); @@ -27,7 +27,7 @@ public class XMLRecordParserTest { } @Test - public void testOrcidXMLErrorRecordParser() throws Exception { + private void testOrcidXMLErrorRecordParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_error.xml")); @@ -40,7 +40,7 @@ public class XMLRecordParserTest { } @Test - public void testOrcidWorkDataXMLParser() throws Exception { + private void testOrcidWorkDataXMLParser() throws Exception { String xml = IOUtils .toString( From 196f36c6edd10203ff304e7cd122b3b679593618 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 30 Jul 2020 13:38:33 +0200 Subject: [PATCH 009/108] fix publication dataset creation --- .../SparkGenEnrichedOrcidWorks.java | 47 +++++-- .../orcidnodoi/oaf/PublicationToOaf.java | 117 +++++++++++++----- .../orcidnodoi/PublicationToOafTest.java | 3 +- 3 files changed, 128 insertions(+), 39 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index b24e71615..cae5a168f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -17,10 +17,12 @@ import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; +import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.gson.Gson; +import com.google.gson.GsonBuilder; import com.google.gson.JsonElement; import com.google.gson.JsonParser; @@ -93,17 +95,48 @@ public class SparkGenEnrichedOrcidWorks { .toJavaRDD(); enrichedWorksRDD.saveAsTextFile(workingPath + outputEnrichedWorksPath); logger.info("Works enriched data saved"); - JavaRDD oafPublicationRDD = enrichedWorksRDD.map(e -> { - JsonElement j = new JsonParser().parse(e._2()); - return (Publication) PublicationToOaf - .generatePublicationActionsFromDump(j.getAsJsonObject()); - }).filter(p -> p != null); + + final LongAccumulator parsedPublications = spark.sparkContext().longAccumulator("parsedPublications"); + final LongAccumulator enrichedPublications = spark + .sparkContext() + .longAccumulator("enrichedPublications"); + final LongAccumulator errorsGeneric = spark.sparkContext().longAccumulator("errorsGeneric"); + final LongAccumulator errorsInvalidTitle = spark.sparkContext().longAccumulator("errorsInvalidTitle"); + final LongAccumulator errorsNotFoundAuthors = spark + .sparkContext() + .longAccumulator("errorsNotFoundAuthors"); + final LongAccumulator errorsInvalidType = spark.sparkContext().longAccumulator("errorsInvalidType"); + final PublicationToOaf publicationToOaf = new PublicationToOaf( + parsedPublications, + enrichedPublications, + errorsGeneric, + errorsInvalidTitle, + errorsNotFoundAuthors, + errorsInvalidType); + JavaRDD oafPublicationRDD = enrichedWorksRDD + .map( + e -> { + return (Publication) publicationToOaf + .generatePublicationActionsFromJson(e._2()); + }) + .filter(p -> p != null); Dataset publicationDataset = spark .createDataset( - oafPublicationRDD.repartition(1).rdd(), + oafPublicationRDD.rdd(), Encoders.bean(Publication.class)); - publicationDataset.write().mode(SaveMode.Overwrite).save(workingPath + "no_doi_dataset/output"); + publicationDataset + .write() + .format("parquet") + .mode(SaveMode.Overwrite) + .save(workingPath + "no_doi_dataset/output"); + + logger.info("parsedPublications: " + parsedPublications.value().toString()); + logger.info("enrichedPublications: " + enrichedPublications.value().toString()); + logger.info("errorsGeneric: " + errorsGeneric.value().toString()); + logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString()); + logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString()); + logger.info("errorsInvalidType: " + errorsInvalidType.value().toString()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 19bfe0f30..448fa9a74 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -3,18 +3,17 @@ package eu.dnetlib.doiboost.orcidnodoi.oaf; import static eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility.*; +import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; -import com.google.gson.JsonArray; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; +import com.google.gson.*; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.oaf.*; @@ -22,7 +21,7 @@ import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; import eu.dnetlib.doiboost.orcidnodoi.util.Pair; -public class PublicationToOaf { +public class PublicationToOaf implements Serializable { static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class); @@ -31,6 +30,37 @@ public class PublicationToOaf { public static final String OPENAIRE_PREFIX = "openaire____"; public static final String SEPARATOR = "::"; + private final LongAccumulator parsedPublications; + private final LongAccumulator enrichedPublications; + private final LongAccumulator errorsGeneric; + private final LongAccumulator errorsInvalidTitle; + private final LongAccumulator errorsNotFoundAuthors; + private final LongAccumulator errorsInvalidType; + + public PublicationToOaf( + LongAccumulator parsedPublications, + LongAccumulator enrichedPublications, + LongAccumulator errorsGeneric, + LongAccumulator errorsInvalidTitle, + LongAccumulator errorsNotFoundAuthors, + LongAccumulator errorsInvalidType) { + this.parsedPublications = parsedPublications; + this.enrichedPublications = enrichedPublications; + this.errorsGeneric = errorsGeneric; + this.errorsInvalidTitle = errorsInvalidTitle; + this.errorsNotFoundAuthors = errorsNotFoundAuthors; + this.errorsInvalidType = errorsInvalidType; + } + + public PublicationToOaf() { + this.parsedPublications = null; + this.enrichedPublications = null; + this.errorsGeneric = null; + this.errorsInvalidTitle = null; + this.errorsNotFoundAuthors = null; + this.errorsInvalidType = null; + } + private static Map> datasources = new HashMap>() { { @@ -69,11 +99,27 @@ public class PublicationToOaf { public static final String PID_TYPES = "dnet:pid_types"; - public static Oaf generatePublicationActionsFromDump(final JsonObject rootElement) { + public Oaf generatePublicationActionsFromJson(final String json) { + try { + if (parsedPublications != null) { + parsedPublications.add(1); + } + JsonElement jElement = new JsonParser().parse(json); + JsonObject jObject = jElement.getAsJsonObject(); + return generatePublicationActionsFromDump(jObject); + } catch (Throwable t) { + logger.error("creating publication: " + t.getMessage()); + if (errorsGeneric != null) { + errorsGeneric.add(1); + } + return null; + } + } + + public Oaf generatePublicationActionsFromDump(final JsonObject rootElement) { logger.debug("generatePublicationActionsFromDump ..."); - if (!isValid(rootElement/* , context */)) { - logger.error("publication not valid"); + if (!isValid(rootElement)) { return null; } @@ -122,8 +168,9 @@ public class PublicationToOaf { // Adding titles final List titles = createRepeatedField(rootElement, "titles"); if (titles == null || titles.isEmpty()) { - logger.error("titles not found"); -// context.incrementCounter("filtered", "title_not_found", 1); + if (errorsInvalidTitle != null) { + errorsInvalidTitle.add(1); + } return null; } Qualifier q = mapQualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); @@ -189,8 +236,9 @@ public class PublicationToOaf { publication.setInstance(Arrays.asList(instance)); } else { - logger.error("type not found"); -// context.incrementCounter("filtered", "type_not_found", 1); + if (errorsInvalidType != null) { + errorsInvalidType.add(1); + } return null; } @@ -199,17 +247,21 @@ public class PublicationToOaf { if (authors != null && authors.size() > 0) { publication.setAuthor(authors); } else { - logger.error("authors not found"); -// context.incrementCounter("filtered", "author_not_found", 1); + if (errorsNotFoundAuthors != null) { + errorsNotFoundAuthors.add(1); + } return null; } String classValue = getDefaultResulttype(cobjValue); publication .setResulttype(mapQualifier(classValue, classValue, "dnet:result_typologies", "dnet:result_typologies")); + if (enrichedPublications != null) { + enrichedPublications.add(1); + } return publication; } - public static List createAuthors(final JsonObject root) { + public List createAuthors(final JsonObject root) { final String authorsJSONFieldName = "contributors"; @@ -273,7 +325,7 @@ public class PublicationToOaf { return null; } - private static List createRepeatedField(final JsonObject rootElement, final String fieldName) { + private List createRepeatedField(final JsonObject rootElement, final String fieldName) { if (!rootElement.has(fieldName)) { return null; } @@ -291,14 +343,14 @@ public class PublicationToOaf { } } - private static String cleanField(String value) { + private String cleanField(String value) { if (value != null && !value.isEmpty() && value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') { value = value.substring(1, value.length() - 1); } return value; } - private static void settingRelevantDate(final JsonObject rootElement, + private void settingRelevantDate(final JsonObject rootElement, final Publication publication, final String jsonKey, final String dictionaryKey, @@ -322,7 +374,7 @@ public class PublicationToOaf { } } - private static String getPublicationDate(final JsonObject rootElement, + private String getPublicationDate(final JsonObject rootElement, final String jsonKey) { JsonObject pubDateJson = null; @@ -358,24 +410,27 @@ public class PublicationToOaf { return null; } - protected static boolean isValid(final JsonObject rootElement/* , final Reporter context */) { + protected boolean isValid(final JsonObject rootElement/* , final Reporter context */) { final String type = getStringValue(rootElement, "type"); if (!typologiesMapping.containsKey(type)) { logger.error("unknowntype_" + type); -// context.incrementCounter("filtered", "unknowntype_" + type, 1); + if (errorsInvalidType != null) { + errorsInvalidType.add(1); + } return false; } if (!isValidJsonArray(rootElement, "titles")) { - logger.error("invalid_title"); -// context.incrementCounter("filtered", "invalid_title", 1); + if (errorsInvalidTitle != null) { + errorsInvalidTitle.add(1); + } return false; } return true; } - private static boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) { + private boolean isValidJsonArray(final JsonObject rootElement, final String fieldName) { if (!rootElement.has(fieldName)) { return false; } @@ -395,7 +450,7 @@ public class PublicationToOaf { return true; } - private static Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) { + private Qualifier mapQualifier(String classId, String className, String schemeId, String schemeName) { final Qualifier qualifier = new Qualifier(); qualifier.setClassid(classId); qualifier.setClassname(className); @@ -404,7 +459,7 @@ public class PublicationToOaf { return qualifier; } - private static ExternalReference convertExtRef(String extId, String classId, String className, String schemeId, + private ExternalReference convertExtRef(String extId, String classId, String className, String schemeId, String schemeName) { ExternalReference ex = new ExternalReference(); ex.setRefidentifier(extId); @@ -412,7 +467,7 @@ public class PublicationToOaf { return ex; } - private static StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) { + private StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) { if (value == null | StringUtils.isBlank(value)) { return null; } @@ -424,7 +479,7 @@ public class PublicationToOaf { return structuredProperty; } - private static Field mapStringField(String value, DataInfo dataInfo) { + private Field mapStringField(String value, DataInfo dataInfo) { if (value == null || StringUtils.isBlank(value)) { return null; } @@ -435,21 +490,21 @@ public class PublicationToOaf { return stringField; } - private static KeyValue createCollectedFrom() { + private KeyValue createCollectedFrom() { KeyValue cf = new KeyValue(); cf.setValue(ORCID); cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "806360c771262b4d6770e7cdf04b5c5a"); return cf; } - private static KeyValue createHostedBy() { + private KeyValue createHostedBy() { KeyValue hb = new KeyValue(); hb.setValue("Unknown Repository"); hb.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c"); return hb; } - private static StructuredProperty mapAuthorId(String orcidId) { + private StructuredProperty mapAuthorId(String orcidId) { final StructuredProperty sp = new StructuredProperty(); sp.setValue(orcidId); final Qualifier q = new Qualifier(); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java index 39f78522f..01e26dcb4 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java @@ -27,7 +27,8 @@ public class PublicationToOafTest { PublicationToOafTest.class.getResourceAsStream("publication.json")); JsonElement j = new JsonParser().parse(jsonPublication); logger.info("json publication loaded: " + j.toString()); - Publication oafPublication = (Publication) PublicationToOaf + PublicationToOaf publicationToOaf = new PublicationToOaf(); + Publication oafPublication = (Publication) publicationToOaf .generatePublicationActionsFromDump(j.getAsJsonObject()); assertNotNull(oafPublication.getId()); assertNotNull(oafPublication.getOriginalId()); From 0377b40fbad56c0dd75fb7c8287488a4f63ceffe Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 30 Jul 2020 18:38:07 +0200 Subject: [PATCH 010/108] output to one parquet file --- .../doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java | 2 +- .../doiboost/orcidnodoi/oaf/PublicationToOaf.java | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index cae5a168f..dea597cbb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -123,7 +123,7 @@ public class SparkGenEnrichedOrcidWorks { Dataset publicationDataset = spark .createDataset( - oafPublicationRDD.rdd(), + oafPublicationRDD.repartition(1).rdd(), Encoders.bean(Publication.class)); publicationDataset .write() diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 448fa9a74..503df67ff 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -16,6 +16,7 @@ import org.slf4j.LoggerFactory; import com.google.gson.*; import eu.dnetlib.dhp.common.PacePerson; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; @@ -217,6 +218,8 @@ public class PublicationToOaf implements Serializable { final List urls = createRepeatedField(rootElement, "urls"); if (urls != null && !urls.isEmpty()) { instance.setUrl(urls); + } else { + dataInfo.setInvisible(true); } final String pubDate = getPublicationDate(rootElement, "publicationDates"); @@ -508,8 +511,10 @@ public class PublicationToOaf implements Serializable { final StructuredProperty sp = new StructuredProperty(); sp.setValue(orcidId); final Qualifier q = new Qualifier(); - q.setClassid("ORCID"); - q.setClassname("ORCID"); + q.setClassid(ORCID.toLowerCase()); + q.setClassname(ORCID.toLowerCase()); + q.setSchemeid(ModelConstants.DNET_PID_TYPES); + q.setSchemename(ModelConstants.DNET_PID_TYPES); sp.setQualifier(q); return sp; } From 538f299767d433ba17681ab82f4b7a32bfb24a2c Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Mon, 14 Sep 2020 12:35:16 +0200 Subject: [PATCH 011/108] merged --- .../dhp/broker/oa/IndexNotificationsJob.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index cb7acb46d..792a2354a 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -47,8 +47,9 @@ public class IndexNotificationsJob { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(IndexNotificationsJob.class - .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json"))); + .toString( + IndexNotificationsJob.class + .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/index_notifications.json"))); parser.parseArgument(args); final SparkConf conf = new SparkConf(); @@ -116,7 +117,8 @@ public class IndexNotificationsJob { final long date) { final List list = subscriptions .stream() - .filter(s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic())) + .filter( + s -> StringUtils.isBlank(s.getTopic()) || s.getTopic().equals("*") || s.getTopic().equals(e.getTopic())) .filter(s -> verifyConditions(e.getMap(), s.conditionsAsMap())) .map(s -> generateNotification(s, e, date)) .collect(Collectors.toList()); @@ -147,15 +149,18 @@ public class IndexNotificationsJob { if (conditions.containsKey("trust") && !SubscriptionUtils - .verifyFloatRange(map.getTrust(), conditions.get("trust").get(0).getValue(), conditions.get("trust").get(0).getOtherValue())) { + .verifyFloatRange( + map.getTrust(), conditions.get("trust").get(0).getValue(), + conditions.get("trust").get(0).getOtherValue())) { return false; } if (conditions.containsKey("targetDateofacceptance") && !conditions .get("targetDateofacceptance") .stream() - .anyMatch(c -> SubscriptionUtils - .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { + .anyMatch( + c -> SubscriptionUtils + .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { return false; } From 9e8e7fe6ef24dbf6a004190cf86cbc623c8b8d21 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Tue, 15 Sep 2020 11:32:49 +0200 Subject: [PATCH 012/108] add comments --- .../java/eu/dnetlib/doiboost/orcid/model/AuthorData.java | 4 ++++ .../dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java | 6 +++++- .../dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java | 6 +++++- .../doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java | 5 ++++- .../eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java | 4 ++++ .../eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java | 4 ++++ .../eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java | 4 ++++ .../doiboost/orcidnodoi/model/PublicationDate.java | 4 ++++ .../dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java | 4 ++++ .../dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java | 5 ++++- .../doiboost/orcidnodoi/similarity/AuthorMatcher.java | 8 ++++++++ .../doiboost/orcidnodoi/util/DumpToActionsUtility.java | 4 ++++ .../doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java | 4 ++++ 13 files changed, 58 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java index 87f1f65c8..e0624509b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java @@ -3,6 +3,10 @@ package eu.dnetlib.doiboost.orcid.model; import java.io.Serializable; +/** + * This class models the data that are retrieved from orcid publication + */ + public class AuthorData implements Serializable { private String oid; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java index 807f52972..d852a7023 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -20,10 +20,14 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.mortbay.log.Log; import eu.dnetlib.doiboost.orcid.json.JsonHelper; -import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; +/** + * This class write on hdfs one sequence file, the key is an orcid identifier and the + * value is an orcid publication in json format + */ + public class ActivitiesDumpReader { private static final int MAX_XML_WORKS_PARSED = -1; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java index 041424ba9..d32e6d945 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java @@ -12,11 +12,15 @@ import org.mortbay.log.Log; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.doiboost.orcid.OrcidDSManager; +/** + * This job generates one sequence file, the key is an orcid identifier and the + * value is an orcid publication in json format + */ + public class GenOrcidAuthorWork extends OrcidDSManager { private String activitiesFileNameTarGz; private String outputWorksPath; -// private String workingPath; public static void main(String[] args) throws IOException, Exception { GenOrcidAuthorWork genOrcidAuthorWork = new GenOrcidAuthorWork(); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index dea597cbb..b984ee2b2 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -22,7 +22,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.gson.Gson; -import com.google.gson.GsonBuilder; import com.google.gson.JsonElement; import com.google.gson.JsonParser; @@ -35,6 +34,10 @@ import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; import scala.Tuple2; +/** + * This spark job generates one parquet file, containing orcid publications dataset + */ + public class SparkGenEnrichedOrcidWorks { static Logger logger = LoggerFactory.getLogger(SparkGenEnrichedOrcidWorks.class); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java index 7f7e3a10a..363cb13e6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java @@ -6,6 +6,10 @@ import com.google.gson.JsonObject; import eu.dnetlib.doiboost.orcid.model.AuthorData; import eu.dnetlib.doiboost.orcid.model.WorkData; +/** + * This class converts an object to json and viceversa + */ + public class JsonWriter { public static String create(AuthorData authorData) { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java index 8a170de09..9a8651c85 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java @@ -5,6 +5,10 @@ import java.io.Serializable; import eu.dnetlib.doiboost.orcid.model.AuthorData; +/** + * This class models the data related to a contributor, that are retrieved from an orcid publication + */ + public class Contributor extends AuthorData implements Serializable { private String sequence; private String role; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java index 865e54ae3..7fe50ce25 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/ExternalId.java @@ -1,6 +1,10 @@ package eu.dnetlib.doiboost.orcidnodoi.model; +/** + * This class models the data related to external id, that are retrieved from an orcid publication + */ + public class ExternalId { private String type; private String value; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java index 9282a80ba..5f794d8eb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/PublicationDate.java @@ -1,6 +1,10 @@ package eu.dnetlib.doiboost.orcidnodoi.model; +/** + * This class models the data related to a publication date, that are retrieved from an orcid publication + */ + public class PublicationDate { private String year; private String month; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java index 5756521e7..58f992d12 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/WorkDataNoDoi.java @@ -4,6 +4,10 @@ package eu.dnetlib.doiboost.orcidnodoi.model; import java.io.Serializable; import java.util.List; +/** + * This class models the data that are retrieved from orcid publication + */ + public class WorkDataNoDoi implements Serializable { private String oid; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 503df67ff..4d1408470 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -22,6 +22,10 @@ import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; import eu.dnetlib.doiboost.orcidnodoi.util.Pair; +/** + * This class converts an orcid publication from json format to oaf + */ + public class PublicationToOaf implements Serializable { static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class); @@ -119,7 +123,6 @@ public class PublicationToOaf implements Serializable { public Oaf generatePublicationActionsFromDump(final JsonObject rootElement) { - logger.debug("generatePublicationActionsFromDump ..."); if (!isValid(rootElement)) { return null; } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java index 1e4c38bef..88c84ee89 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java @@ -21,6 +21,14 @@ import eu.dnetlib.doiboost.orcid.model.AuthorData; import eu.dnetlib.doiboost.orcidnodoi.model.Contributor; import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +/** + * This class is used for searching from a list of publication contributors a + * specific author making a similarity check on both name and surname of the + * author with the credit name of each contributor of the list; as soon as + * the match is found (if exist) author informations are used to enrich the + * matched contribuotr inside contributors list + */ + public class AuthorMatcher { private static final Logger logger = LoggerFactory.getLogger(AuthorMatcher.class); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java index 9b9f3c8b2..ea4e58c44 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java @@ -9,6 +9,10 @@ import org.apache.commons.lang3.StringUtils; import com.google.gson.JsonArray; import com.google.gson.JsonObject; +/** + * Utility class + */ + public class DumpToActionsUtility { private static final SimpleDateFormat ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java index ae96a322f..c5c115551 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java @@ -17,6 +17,10 @@ import eu.dnetlib.doiboost.orcidnodoi.model.ExternalId; import eu.dnetlib.doiboost.orcidnodoi.model.PublicationDate; import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +/** + * This class is used for parsing xml data with vtd parser + */ + public class XMLRecordParserNoDoi { private static final Logger logger = LoggerFactory.getLogger(XMLRecordParserNoDoi.class); From fefbcfb10682728a499ad6181e83519f66b708a7 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Tue, 22 Sep 2020 10:20:25 +0200 Subject: [PATCH 013/108] dependency version moved to main pom (PR review) --- dhp-workflows/dhp-doiboost/pom.xml | 2 +- pom.xml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index e9768be7e..357a57367 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -87,7 +87,7 @@ org.apache.commons commons-text - 1.8 + ${common.text.version} diff --git a/pom.xml b/pom.xml index e88e1d51b..9897c8abe 100644 --- a/pom.xml +++ b/pom.xml @@ -669,5 +669,6 @@ 1.1 3.5.3 4.13.0 + 1.8 From a97ad20c7bd7725ee513694d9b00aae0a19b19d2 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Tue, 22 Sep 2020 10:46:34 +0200 Subject: [PATCH 014/108] exception is now propagated (PR review) --- .../java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java index aa61c0117..8ebeab2e5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java @@ -48,15 +48,10 @@ public class OrcidDSManager { return conf; } - protected FileSystem initFileSystemObject(Configuration conf) { + protected FileSystem initFileSystemObject(Configuration conf) throws IOException { // Get the filesystem - HDFS FileSystem fs = null; - try { - fs = FileSystem.get(URI.create(hdfsServerUri.concat(workingPath)), conf); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + fs = FileSystem.get(URI.create(hdfsServerUri.concat(workingPath)), conf); return fs; } From ab083f9946a219396b0099f29d67d7c492eec126 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 22 Oct 2020 14:02:32 +0200 Subject: [PATCH 015/108] propagate exception on parsing work (PR request) --- .../dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java index d852a7023..c73e1efd1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -128,10 +128,7 @@ public class ActivitiesDumpReader { } } } catch (Exception e) { - Log - .warn( - "Parsing work from tar archive and xml work: " + filename + " " + e.getMessage()); -// Log.warn(e); + throw new Exception(filename, e); } if ((counter % XML_WORKS_PARSED_COUNTER_LOG_INTERVAL) == 0) { @@ -143,7 +140,7 @@ public class ActivitiesDumpReader { } } } - } catch (IOException e) { + } catch (Exception e) { Log.warn("Parsing work from gzip archive: " + e.getMessage()); Log.warn(e); throw new RuntimeException(e); From c295c71ca0c77f5b3aed5817a872d9c9da77aade Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 22 Oct 2020 14:07:26 +0200 Subject: [PATCH 016/108] added comment --- .../src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java index 8ebeab2e5..b62ad370e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java @@ -50,6 +50,7 @@ public class OrcidDSManager { protected FileSystem initFileSystemObject(Configuration conf) throws IOException { // Get the filesystem - HDFS + // if there is an exception, it will be propagate FileSystem fs = null; fs = FileSystem.get(URI.create(hdfsServerUri.concat(workingPath)), conf); return fs; From c3114ba0aeaada891ff13de4c3e4f6469b40ba99 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 22 Oct 2020 14:21:31 +0200 Subject: [PATCH 017/108] replaced null as return value with a more safe empty string --- .../dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index b984ee2b2..24f0f7a87 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -167,6 +167,6 @@ public class SparkGenEnrichedOrcidWorks { return name.getAsString(); } } - return null; + return new String(""); } } From 846ba3087310024cf3e28fb2c88f10259323f5f6 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 22 Oct 2020 14:36:18 +0200 Subject: [PATCH 018/108] if typologies mapping fails, an exception will be propagated --- .../eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 4d1408470..deb83723b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -97,8 +97,8 @@ public class PublicationToOaf implements Serializable { .getResourceAsStream( "/eu/dnetlib/dhp/doiboost/orcidnodoi/mappings/typologies.json")); typologiesMapping = new Gson().fromJson(tt, Map.class); - } catch (final Exception e) { - logger.error("loading typologies", e); + } catch (Exception e) { + throw new RuntimeException("loading typologies", e); } } From c58db1c8eab65e0e4ad7d27b7bbc5f815961f050 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 22 Oct 2020 15:11:02 +0200 Subject: [PATCH 019/108] added filter on null value after map function --- .../eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index deb83723b..63979d1af 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -185,6 +185,7 @@ public class PublicationToOaf implements Serializable { .map(t -> { return mapStructuredProperty(t, q, null); }) + .filter(s -> s!=null) .collect(Collectors.toList())); // Adding identifier final String id = getStringValue(rootElement, "id"); @@ -376,7 +377,7 @@ public class PublicationToOaf implements Serializable { .map(r -> { return mapStructuredProperty(r, q, null); }) - .collect(Collectors.toList())); + .filter(s -> s!=null).collect(Collectors.toList())); } } From 1139d6568d392b61e97c12ce1ceac9b2a59b42e2 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 22 Oct 2020 15:32:26 +0200 Subject: [PATCH 020/108] replaced null value with a more safe empty string as return value --- .../dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java | 8 +++++++- .../doiboost/orcidnodoi/util/DumpToActionsUtility.java | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 63979d1af..136356161 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -166,7 +166,13 @@ public class PublicationToOaf implements Serializable { // Adding source final String source = getStringValue(rootElement, "sourceName"); if (StringUtils.isNotBlank(source)) { - publication.setSource(Arrays.asList(mapStringField(source, null))); + Field sourceField = mapStringField(source, null); + if (sourceField==null) { + publication.setSource(null); + } + else { + publication.setSource(Arrays.asList(sourceField)); + } } // Adding titles diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java index ea4e58c44..8096c4e8e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/util/DumpToActionsUtility.java @@ -20,7 +20,7 @@ public class DumpToActionsUtility { public static String getStringValue(final JsonObject root, final String key) { if (root.has(key) && !root.get(key).isJsonNull()) return root.get(key).getAsString(); - return null; + return new String(""); } public static List getArrayValues(final JsonObject root, final String key) { From a38ab57062955b425c1fec90875c16a6954fb83d Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 22 Oct 2020 15:43:50 +0200 Subject: [PATCH 021/108] let run test methods --- .../eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index ca91a242a..bf5aba99b 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -41,7 +41,7 @@ public class OrcidNoDoiTest { String orcidIdA = "0000-0003-2760-1191"; @Test - @Ignore +// @Ignore public void readPublicationFieldsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); @@ -95,7 +95,7 @@ public class OrcidNoDoiTest { } @Test - @Ignore +// @Ignore private void authorMatchTest() throws Exception { logger.info("running authorSimpleMatchTest ...."); String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml"; From b0290dbcb7728da0b447d38953702ab681bb1ce0 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 22 Oct 2020 16:20:46 +0200 Subject: [PATCH 022/108] moved all dependencies version to main pom.xml --- dhp-workflows/dhp-doiboost/pom.xml | 4 ++-- pom.xml | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index 357a57367..b81299cd1 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -14,7 +14,7 @@ net.alchim31.maven scala-maven-plugin - 4.0.1 + ${net.alchim31.maven.version} scala-compile-first @@ -51,7 +51,7 @@ org.apache.httpcomponents httpclient - 4.3.4 + ${org.apache.httpcomponents.version} eu.dnetlib.dhp diff --git a/pom.xml b/pom.xml index 9897c8abe..bae53fcc0 100644 --- a/pom.xml +++ b/pom.xml @@ -670,5 +670,7 @@ 3.5.3 4.13.0 1.8 + 4.3.4 + 4.0.1 From 210a50e4f486c195b627d462e64d8ee10c3dc70e Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 22 Oct 2020 16:24:42 +0200 Subject: [PATCH 023/108] replaced null value --- .../doiboost/orcidnodoi/oaf/PublicationToOaf.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 136356161..ece59c3f1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -167,10 +167,9 @@ public class PublicationToOaf implements Serializable { final String source = getStringValue(rootElement, "sourceName"); if (StringUtils.isNotBlank(source)) { Field sourceField = mapStringField(source, null); - if (sourceField==null) { + if (sourceField == null) { publication.setSource(null); - } - else { + } else { publication.setSource(Arrays.asList(sourceField)); } } @@ -191,7 +190,7 @@ public class PublicationToOaf implements Serializable { .map(t -> { return mapStructuredProperty(t, q, null); }) - .filter(s -> s!=null) + .filter(s -> s != null) .collect(Collectors.toList())); // Adding identifier final String id = getStringValue(rootElement, "id"); @@ -383,7 +382,8 @@ public class PublicationToOaf implements Serializable { .map(r -> { return mapStructuredProperty(r, q, null); }) - .filter(s -> s!=null).collect(Collectors.toList())); + .filter(s -> s != null) + .collect(Collectors.toList())); } } From 6bc7dbeca76e94f6cb00725aa50753d61d122952 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Fri, 6 Nov 2020 13:47:50 +0100 Subject: [PATCH 024/108] first version of dataset successful generated from orcid dump 2020 --- dhp-workflows/dhp-doiboost/pom.xml | 2 - .../doiboost/orcid/OrcidDSManager.java | 2 +- .../orcidnodoi/ActivitiesDumpReader.java | 4 +- .../orcidnodoi/GenOrcidAuthorWork.java | 2 +- .../SparkGenEnrichedOrcidWorks.java | 8 +- .../orcidnodoi/similarity/AuthorMatcher.java | 4 +- .../orcidnodoi/xml/XMLRecordParserNoDoi.java | 53 +++--- ... => gen_orcid_authors_from_summaries.json} | 0 ...en_orcid_works-no-doi_from_activities.json | 7 + .../orcid/oozie_app/config-default.xml | 42 ----- .../dhp/doiboost/orcid/oozie_app/workflow.xml | 67 -------- .../oozie_app/config-default.xml | 2 +- .../orcid_activities/oozie_app/workflow.xml | 156 +++++++++++------- .../oozie_app/config-default.xml | 4 + .../orcid_summaries/oozie_app/workflow.xml | 14 +- .../oozie_app/workflow.xml | 4 +- .../doiboost/orcid/OrcidClientTest.java | 4 +- .../orcidnodoi/xml/OrcidNoDoiTest.java | 60 ++++++- ..._work_0000-0003-2760-1191_contributors.xml | 101 ++++++++++++ pom.xml | 12 ++ 20 files changed, 320 insertions(+), 228 deletions(-) rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{create_orcid_authors_data.json => gen_orcid_authors_from_summaries.json} (100%) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/workflow.xml rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{gen_enriched_orcid_works => orcidnodoi}/oozie_app/workflow.xml (95%) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191_contributors.xml diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index b81299cd1..624dd7b31 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -51,7 +51,6 @@ org.apache.httpcomponents httpclient - ${org.apache.httpcomponents.version} eu.dnetlib.dhp @@ -87,7 +86,6 @@ org.apache.commons commons-text - ${common.text.version} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java index b62ad370e..bf13db021 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java @@ -62,7 +62,7 @@ public class OrcidDSManager { .toString( OrcidDSManager.class .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json"))); + "/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json"))); parser.parseArgument(args); hdfsServerUri = parser.get("hdfsServerUri"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java index c73e1efd1..c2cfafd87 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -73,7 +73,7 @@ public class ActivitiesDumpReader { SequenceFile.Writer.valueClass(Text.class))) { while ((entry = tais.getNextTarEntry()) != null) { String filename = entry.getName(); - + StringBuffer buffer = new StringBuffer(); try { if (entry.isDirectory() || !filename.contains("works")) { @@ -83,7 +83,7 @@ public class ActivitiesDumpReader { BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from // tarInput String line; - StringBuffer buffer = new StringBuffer(); + buffer = new StringBuffer(); while ((line = br.readLine()) != null) { buffer.append(line); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java index d32e6d945..d3e9aeaef 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java @@ -42,7 +42,7 @@ public class GenOrcidAuthorWork extends OrcidDSManager { .toString( GenOrcidAuthorWork.class .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works_parameters.json"))); + "/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json"))); parser.parseArgument(args); hdfsServerUri = parser.get("hdfsServerUri"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 24f0f7a87..691ca3eee 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -67,7 +67,7 @@ public class SparkGenEnrichedOrcidWorks { JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaPairRDD summariesRDD = sc - .sequenceFile(workingPath + "summaries/output/authors.seq", Text.class, Text.class); + .sequenceFile(workingPath + "authors/authors.seq", Text.class, Text.class); Dataset summariesDataset = spark .createDataset( summariesRDD.map(seq -> loadAuthorFromJson(seq._1(), seq._2())).rdd(), @@ -96,8 +96,8 @@ public class SparkGenEnrichedOrcidWorks { Encoders.tuple(Encoders.STRING(), Encoders.STRING())) .filter(Objects::nonNull) .toJavaRDD(); - enrichedWorksRDD.saveAsTextFile(workingPath + outputEnrichedWorksPath); - logger.info("Works enriched data saved"); +// enrichedWorksRDD.saveAsTextFile(workingPath + outputEnrichedWorksPath); + logger.info("Enriched works RDD ready."); final LongAccumulator parsedPublications = spark.sparkContext().longAccumulator("parsedPublications"); final LongAccumulator enrichedPublications = spark @@ -132,7 +132,7 @@ public class SparkGenEnrichedOrcidWorks { .write() .format("parquet") .mode(SaveMode.Overwrite) - .save(workingPath + "no_doi_dataset/output"); + .save(workingPath + outputEnrichedWorksPath); logger.info("parsedPublications: " + parsedPublications.value().toString()); logger.info("enrichedPublications: " + enrichedPublications.value().toString()); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java index 88c84ee89..6a1468f4c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java @@ -5,6 +5,7 @@ import java.io.IOException; import java.text.Normalizer; import java.util.*; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.similarity.JaroWinklerSimilarity; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,7 +41,7 @@ public class AuthorMatcher { int matchCounter = 0; List matchCounters = Arrays.asList(matchCounter); Contributor contributor = null; - contributors.forEach(c -> { + contributors.stream().filter(c -> !StringUtils.isBlank(c.getCreditName())).forEach(c -> { if (simpleMatch(c.getCreditName(), author.getName()) || simpleMatch(c.getCreditName(), author.getSurname()) || simpleMatch(c.getCreditName(), author.getOtherName())) { @@ -54,6 +55,7 @@ public class AuthorMatcher { Optional optCon = contributors .stream() .filter(c -> c.isSimpleMatch()) + .filter(c -> !StringUtils.isBlank(c.getCreditName())) .map(c -> { c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); return c; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java index c5c115551..f4b093402 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java @@ -183,39 +183,34 @@ public class XMLRecordParserNoDoi { private static List getContributors(VTDGen vg, VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { List contributors = new ArrayList(); - int nameIndex = 0; - ap.selectXPath("//work:contributor/work:credit-name"); + ap.selectXPath("//work:contributors/work:contributor"); while (ap.evalXPath() != -1) { Contributor contributor = new Contributor(); - int t = vn.getText(); - if (t >= 0) { - contributor.setCreditName(vn.toNormalizedString(t)); - contributors.add(nameIndex, contributor); - nameIndex++; + if (vn.toElement(VTDNav.FIRST_CHILD, "work:credit-name")) { + int val = vn.getText(); + if (val != -1) { + contributor.setCreditName(vn.toNormalizedString(val)); + } + vn.toElement(VTDNav.PARENT); } - } - if (contributors.size() == 0) { - return contributors; - } - - int sequenceIndex = 0; - ap.selectXPath("//work:contributor/work:contributor-attributes/work:contributor-sequence"); - while (ap.evalXPath() != -1) { - int t = vn.getText(); - if (t >= 0) { - contributors.get(sequenceIndex).setSequence(vn.toNormalizedString(t)); - sequenceIndex++; - } - } - - int roleIndex = 0; - ap.selectXPath("//work:contributor/work:contributor-attributes/work:contributor-role"); - while (ap.evalXPath() != -1) { - int t = vn.getText(); - if (t >= 0) { - contributors.get(roleIndex).setRole(vn.toNormalizedString(t)); - roleIndex++; + if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-attributes")) { + if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-sequence")) { + int val = vn.getText(); + if (val != -1) { + contributor.setSequence(vn.toNormalizedString(val)); + } + vn.toElement(VTDNav.PARENT); + } + if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-role")) { + int val = vn.getText(); + if (val != -1) { + contributor.setRole(vn.toNormalizedString(val)); + } + vn.toElement(VTDNav.PARENT); + } + vn.toElement(VTDNav.PARENT); } + contributors.add(contributor); } return contributors; } diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json new file mode 100644 index 000000000..c3a8f92ec --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json @@ -0,0 +1,7 @@ +[ + {"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the default work path", "paramRequired": true}, + {"paramName":"f", "paramLongName":"activitiesFileNameTarGz", "paramDescription": "the name of the activities orcid file", "paramRequired": true}, + {"paramName":"ow", "paramLongName":"outputWorksPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true}, + {"paramName":"oew", "paramLongName":"outputEnrichedWorksPath", "paramDescription": "the relative folder of the sequencial file to write the data", "paramRequired": true} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml deleted file mode 100644 index fe14bb8cb..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - jobTracker - hadoop-rm3.garr-pa1.d4science.org:8032 - - - nameNode - hdfs://hadoop-rm1.garr-pa1.d4science.org:8020 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - hive_metastore_uris - thrift://hadoop-edge2.garr-pa1.d4science.org:9083 - - - spark2YarnHistoryServerAddress - http://hadoop-edge1.garr-pa1.d4science.org:18089/ - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/workflow.xml deleted file mode 100644 index 51e00dc0f..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/workflow.xml +++ /dev/null @@ -1,67 +0,0 @@ - - - - workingPath - the working dir base path - - - shell_cmd_0 - wget -O /tmp/ORCID_2019_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/18017633 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_summaries.tar.gz /data/orcid_activities/ORCID_2019_summaries.tar.gz ; rm -f /tmp/ORCID_2019_summaries.tar.gz - - the shell command that downloads and puts to hdfs orcid summaries - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - ${fs:exists(concat(workingPath,'/ORCID_2019_summaries.tar.gz'))} - - - - - - - - ${jobTracker} - ${nameNode} - bash - -c - ${shell_cmd_0} - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidDSManager - -w${workingPath}/ - -n${nameNode} - -fORCID_2019_summaries.tar.gz - -osummaries/output/ - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml index 3068562d0..05fe6d014 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/config-default.xml @@ -9,7 +9,7 @@ oozie.launcher.mapreduce.map.java.opts - -Xmx4g + -Xmx2g jobTracker diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml index 8f9a5123e..ea4d33296 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_activities/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + workingPath @@ -6,70 +6,70 @@ shell_cmd_0 - wget -O /tmp/ORCID_2019_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/18017660 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_0.tar.gz /data/orcid_activities/ORCID_2019_activites_0.tar.gz ; rm -f /tmp/ORCID_2019_activites_0.tar.gz + wget -O /tmp/ORCID_2020_10_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/25002232 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_0.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_0.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_0.tar.gz the shell command that downloads and puts to hdfs orcid activity file 0 shell_cmd_1 - wget -O /tmp/ORCID_2019_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/18017675 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_1.tar.gz /data/orcid_activities/ORCID_2019_activites_1.tar.gz ; rm -f /tmp/ORCID_2019_activites_1.tar.gz + wget -O /tmp/ORCID_2020_10_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/25002088 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_1.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_1.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_1.tar.gz the shell command that downloads and puts to hdfs orcid activity file 1 shell_cmd_2 - wget -O /tmp/ORCID_2019_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/18017717 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_2.tar.gz /data/orcid_activities/ORCID_2019_activites_2.tar.gz ; rm -f /tmp/ORCID_2019_activites_2.tar.gz + wget -O /tmp/ORCID_2020_10_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/25000596 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_2.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_2.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_2.tar.gz the shell command that downloads and puts to hdfs orcid activity file 2 shell_cmd_3 - wget -O /tmp/ORCID_2019_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/18017765 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_3.tar.gz /data/orcid_activities/ORCID_2019_activites_3.tar.gz ; rm -f /tmp/ORCID_2019_activites_3.tar.gz + wget -O /tmp/ORCID_2020_10_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/25015150 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_3.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_3.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_3.tar.gz the shell command that downloads and puts to hdfs orcid activity file 3 - + shell_cmd_4 - wget -O /tmp/ORCID_2019_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/18017831 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_4.tar.gz /data/orcid_activities/ORCID_2019_activites_4.tar.gz ; rm -f /tmp/ORCID_2019_activites_4.tar.gz + wget -O /tmp/ORCID_2020_10_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/25033643 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_4.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_4.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_4.tar.gz the shell command that downloads and puts to hdfs orcid activity file 4 - + shell_cmd_5 - wget -O /tmp/ORCID_2019_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/18017987 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_5.tar.gz /data/orcid_activities/ORCID_2019_activites_5.tar.gz ; rm -f /tmp/ORCID_2019_activites_5.tar.gz + wget -O /tmp/ORCID_2020_10_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/25005483 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_5.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_5.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_5.tar.gz the shell command that downloads and puts to hdfs orcid activity file 5 - + shell_cmd_6 - wget -O /tmp/ORCID_2019_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/18018053 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_6.tar.gz /data/orcid_activities/ORCID_2019_activites_6.tar.gz ; rm -f /tmp/ORCID_2019_activites_6.tar.gz + wget -O /tmp/ORCID_2020_10_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/25005425 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_6.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_6.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_6.tar.gz the shell command that downloads and puts to hdfs orcid activity file 6 shell_cmd_7 - wget -O /tmp/ORCID_2019_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/18018023 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_7.tar.gz /data/orcid_activities/ORCID_2019_activites_7.tar.gz ; rm -f /tmp/ORCID_2019_activites_7.tar.gz + wget -O /tmp/ORCID_2020_10_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/25012016 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_7.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_7.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_7.tar.gz the shell command that downloads and puts to hdfs orcid activity file 7 shell_cmd_8 - wget -O /tmp/ORCID_2019_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/18018248 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_8.tar.gz /data/orcid_activities/ORCID_2019_activites_8.tar.gz ; rm -f /tmp/ORCID_2019_activites_8.tar.gz + wget -O /tmp/ORCID_2020_10_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/25012079 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_8.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_8.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_8.tar.gz the shell command that downloads and puts to hdfs orcid activity file 8 shell_cmd_9 - wget -O /tmp/ORCID_2019_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/18018029 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_9.tar.gz /data/orcid_activities/ORCID_2019_activites_9.tar.gz ; rm -f /tmp/ORCID_2019_activites_9.tar.gz + wget -O /tmp/ORCID_2020_10_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/25010727 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_9.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_9.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_9.tar.gz the shell command that downloads and puts to hdfs orcid activity file 9 - + shell_cmd_X - wget -O /tmp/ORCID_2019_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/18018182 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_X.tar.gz /data/orcid_activities/ORCID_2019_activites_X.tar.gz ; rm -f /tmp/ORCID_2019_activites_X.tar.gz + wget -O /tmp/ORCID_2020_10_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/25011025 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_activites_X.tar.gz /data/orcid_activities_2020/ORCID_2020_10_activites_X.tar.gz ; rm -f /tmp/ORCID_2020_10_activites_X.tar.gz the shell command that downloads and puts to hdfs orcid activity file X - + @@ -82,11 +82,11 @@ - + - + @@ -102,8 +102,8 @@ - - ${fs:exists(concat(workingPath,'/ORCID_2019_activites_0.tar.gz'))} + + ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_0.tar.gz'))} @@ -118,7 +118,7 @@ ${shell_cmd_0} - + @@ -129,7 +129,7 @@ eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork -w${workingPath}/ -n${nameNode} - -fORCID_2019_activites_0.tar.gz + -fORCID_2020_10_activites_0.tar.gz -owno_doi_works/works_0.seq -oewno_doi_enriched_works/ @@ -139,8 +139,8 @@ - - ${fs:exists(concat(workingPath,'/ORCID_2019_activites_1.tar.gz'))} + + ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_1.tar.gz'))} @@ -155,7 +155,7 @@ ${shell_cmd_1} - + @@ -166,7 +166,7 @@ eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork -w${workingPath}/ -n${nameNode} - -fORCID_2019_activites_1.tar.gz + -fORCID_2020_10_activites_1.tar.gz -owno_doi_works/works_1.seq -oewno_doi_enriched_works/ @@ -176,8 +176,8 @@ - - ${fs:exists(concat(workingPath,'/ORCID_2019_activites_2.tar.gz'))} + + ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_2.tar.gz'))} @@ -192,7 +192,7 @@ ${shell_cmd_2} - + @@ -203,7 +203,7 @@ eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork -w${workingPath}/ -n${nameNode} - -fORCID_2019_activites_2.tar.gz + -fORCID_2020_10_activites_2.tar.gz -owno_doi_works/works_2.seq -oewno_doi_enriched_works/ @@ -213,8 +213,8 @@ - - ${fs:exists(concat(workingPath,'/ORCID_2019_activites_3.tar.gz'))} + + ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_3.tar.gz'))} @@ -229,7 +229,7 @@ ${shell_cmd_3} - + @@ -240,7 +240,7 @@ eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork -w${workingPath}/ -n${nameNode} - -fORCID_2019_activites_3.tar.gz + -fORCID_2020_10_activites_3.tar.gz -owno_doi_works/works_3.seq -oewno_doi_enriched_works/ @@ -250,8 +250,8 @@ - - ${fs:exists(concat(workingPath,'/ORCID_2019_activites_4.tar.gz'))} + + ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_4.tar.gz'))} @@ -266,7 +266,7 @@ ${shell_cmd_4} - + @@ -277,7 +277,7 @@ eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork -w${workingPath}/ -n${nameNode} - -fORCID_2019_activites_4.tar.gz + -fORCID_2020_10_activites_4.tar.gz -owno_doi_works/works_4.seq -oewno_doi_enriched_works/ @@ -287,8 +287,8 @@ - - ${fs:exists(concat(workingPath,'/ORCID_2019_activites_5.tar.gz'))} + + ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_5.tar.gz'))} @@ -303,7 +303,7 @@ ${shell_cmd_5} - + @@ -314,7 +314,7 @@ eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork -w${workingPath}/ -n${nameNode} - -fORCID_2019_activites_5.tar.gz + -fORCID_2020_10_activites_5.tar.gz -owno_doi_works/works_5.seq -oewno_doi_enriched_works/ @@ -324,8 +324,8 @@ - - ${fs:exists(concat(workingPath,'/ORCID_2019_activites_6.tar.gz'))} + + ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_6.tar.gz'))} @@ -340,7 +340,7 @@ ${shell_cmd_6} - + @@ -351,7 +351,7 @@ eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork -w${workingPath}/ -n${nameNode} - -fORCID_2019_activites_6.tar.gz + -fORCID_2020_10_activites_6.tar.gz -owno_doi_works/works_6.seq -oewno_doi_enriched_works/ @@ -362,8 +362,8 @@ - - ${fs:exists(concat(workingPath,'/ORCID_2019_activites_7.tar.gz'))} + + ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_7.tar.gz'))} @@ -378,7 +378,7 @@ ${shell_cmd_7} - + @@ -389,7 +389,7 @@ eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork -w${workingPath}/ -n${nameNode} - -fORCID_2019_activites_7.tar.gz + -fORCID_2020_10_activites_7.tar.gz -owno_doi_works/works_7.seq -oewno_doi_enriched_works/ @@ -399,8 +399,8 @@ - - ${fs:exists(concat(workingPath,'/ORCID_2019_activites_8.tar.gz'))} + + ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_8.tar.gz'))} @@ -415,7 +415,7 @@ ${shell_cmd_8} - + @@ -426,7 +426,7 @@ eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork -w${workingPath}/ -n${nameNode} - -fORCID_2019_activites_8.tar.gz + -fORCID_2020_10_activites_8.tar.gz -owno_doi_works/works_8.seq -oewno_doi_enriched_works/ @@ -436,8 +436,8 @@ - - ${fs:exists(concat(workingPath,'/ORCID_2019_activites_9.tar.gz'))} + + ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_9.tar.gz'))} @@ -452,7 +452,7 @@ ${shell_cmd_9} - + @@ -463,7 +463,7 @@ eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork -w${workingPath}/ -n${nameNode} - -fORCID_2019_activites_9.tar.gz + -fORCID_2020_10_activites_9.tar.gz -owno_doi_works/works_9.seq -oewno_doi_enriched_works/ @@ -473,8 +473,8 @@ - - ${fs:exists(concat(workingPath,'/ORCID_2019_activites_X.tar.gz'))} + + ${fs:exists(concat(workingPath,'/ORCID_2020_10_activites_X.tar.gz'))} @@ -489,7 +489,7 @@ ${shell_cmd_X} - + @@ -500,7 +500,7 @@ eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork -w${workingPath}/ -n${nameNode} - -fORCID_2019_activites_X.tar.gz + -fORCID_2020_10_activites_X.tar.gz -owno_doi_works/works_X.seq -oewno_doi_enriched_works/ @@ -508,7 +508,35 @@ + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml index e77dd09c9..e1829e847 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml @@ -19,4 +19,8 @@ oozie.launcher.mapreduce.user.classpath.first true + + oozie.launcher.mapreduce.map.java.opts + -Xmx16g + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml index 3362cc67b..8517f35ee 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + workingPath @@ -6,7 +6,7 @@ shell_cmd_0 - wget -O /tmp/ORCID_2019_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/18017633 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_summaries.tar.gz /data/orcid_activities/ORCID_2019_summaries.tar.gz ; rm -f /tmp/ORCID_2019_summaries.tar.gz + wget -O /tmp/ORCID_2020_10_summaries.tar.gz https://orcid.figshare.com/ndownloader/files/25032905 ; hdfs dfs -copyFromLocal /tmp/ORCID_2020_10_summaries.tar.gz /data/orcid_activities_2020/ORCID_2020_10_summaries.tar.gz ; rm -f /tmp/ORCID_2020_10_summaries.tar.gz the shell command that downloads and puts to hdfs orcid summaries @@ -21,8 +21,8 @@ - - + + @@ -31,7 +31,7 @@ - ${fs:exists(concat(workingPath,'/ORCID_2019_summaries.tar.gz'))} + ${fs:exists(concat(workingPath,'/ORCID_2020_10_summaries.tar.gz'))} @@ -57,8 +57,8 @@ eu.dnetlib.doiboost.orcid.OrcidDSManager -w${workingPath}/ -n${nameNode} - -fORCID_2019_summaries.tar.gz - -osummaries/output/ + -fORCID_2020_10_summaries.tar.gz + -oauthors/ diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml similarity index 95% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml index faed3104a..6cec48a6d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_enriched_orcid_works/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml @@ -59,7 +59,7 @@ - + @@ -85,7 +85,7 @@ -n${nameNode} -f- -owno_doi_works/ - -oewno_doi_enriched_works/output + -oewno_doi_dataset diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 5e0f91ecd..774475626 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -38,8 +38,8 @@ public class OrcidClientTest { @Test public void downloadTest() throws Exception { - String record = testDownloadRecord("0000-0002-2536-4498"); - File f = new File("/tmp/downloaded_0000-0002-2536-4498.xml"); + String record = testDownloadRecord("0000-0001-6163-2042"); + File f = new File("/tmp/downloaded_0000-0001-6163-2042.xml"); OutputStream outStream = new FileOutputStream(f); IOUtils.write(record.getBytes(), outStream); System.out.println("saved to tmp"); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index bf5aba99b..fa2980ac4 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -2,15 +2,20 @@ package eu.dnetlib.doiboost.orcidnodoi.xml; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.text.Normalizer; import java.util.*; +import javax.validation.constraints.AssertTrue; + import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.similarity.JaccardSimilarity; import org.apache.commons.text.similarity.JaroWinklerSimilarity; import org.junit.jupiter.api.Test; +import org.mortbay.log.Log; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,7 +46,6 @@ public class OrcidNoDoiTest { String orcidIdA = "0000-0003-2760-1191"; @Test -// @Ignore public void readPublicationFieldsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); @@ -95,8 +99,7 @@ public class OrcidNoDoiTest { } @Test -// @Ignore - private void authorMatchTest() throws Exception { + public void authorMatchTest() throws Exception { logger.info("running authorSimpleMatchTest ...."); String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml"; AuthorData author = new AuthorData(); @@ -121,9 +124,60 @@ public class OrcidNoDoiTest { logger.error("parsing xml", e); } assertNotNull(workData); + + Contributor a = workData.getContributors().get(0); + assertTrue(a.getCreditName().equals("Abdel-Dayem K")); + AuthorMatcher.match(author, workData.getContributors()); GsonBuilder builder = new GsonBuilder(); Gson gson = builder.create(); logger.info(gson.toJson(workData)); + + assertTrue(workData.getContributors().size() == 6); + Contributor c = workData.getContributors().get(0); + assertTrue(c.getOid().equals("0000-0003-2760-1191")); + assertTrue(c.getName().equals("Khairy")); + assertTrue(c.getSurname().equals("Abdel Dayem")); + assertTrue(c.getCreditName().equals("Abdel-Dayem K")); + } + + @Test + public void readContributorsTest() + throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { + logger.info("running loadPublicationFieldsTest ...."); + String xml = IOUtils + .toString( + OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191_contributors.xml")); + + if (xml == null) { + logger.info("Resource not found"); + } + XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); + if (p == null) { + logger.info("XMLRecordParserNoDoi null"); + } + WorkDataNoDoi workData = null; + try { + workData = p.VTDParseWorkData(xml.getBytes()); + } catch (Exception e) { + logger.error("parsing xml", e); + } + assertNotNull(workData.getContributors()); + assertTrue(workData.getContributors().size() == 5); + assertTrue(StringUtils.isBlank(workData.getContributors().get(0).getCreditName())); + assertTrue(workData.getContributors().get(0).getSequence().equals("seq0")); + assertTrue(workData.getContributors().get(0).getRole().equals("role0")); + assertTrue(workData.getContributors().get(1).getCreditName().equals("creditname1")); + assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getSequence())); + assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getRole())); + assertTrue(workData.getContributors().get(2).getCreditName().equals("creditname2")); + assertTrue(workData.getContributors().get(2).getSequence().equals("seq2")); + assertTrue(StringUtils.isBlank(workData.getContributors().get(2).getRole())); + assertTrue(workData.getContributors().get(3).getCreditName().equals("creditname3")); + assertTrue(StringUtils.isBlank(workData.getContributors().get(3).getSequence())); + assertTrue(workData.getContributors().get(3).getRole().equals("role3")); + assertTrue(StringUtils.isBlank(workData.getContributors().get(4).getCreditName())); + assertTrue(workData.getContributors().get(4).getSequence().equals("seq4")); + assertTrue(workData.getContributors().get(4).getRole().equals("role4")); } } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191_contributors.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191_contributors.xml new file mode 100644 index 000000000..26e64aeda --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191_contributors.xml @@ -0,0 +1,101 @@ + + + 2016-12-12T23:02:05.233Z + 2016-12-13T09:08:16.412Z + + + https://orcid.org/0000-0002-9157-3431 + 0000-0002-9157-3431 + orcid.org + + Europe PubMed Central + + + Cutoff Value of Admission N-Terminal Pro-Brain Natriuretic Peptide Which + Predicts Poor Myocardial Perfusion after Primary Percutaneous Coronary Intervention for + ST-Segment-Elevation Myocardial Infarction. + + + formatted-unspecified + Abdel-Dayem K, Eweda II, El-Sherbiny A, Dimitry MO, Nammas W, Acta + Cardiologica Sinica, 2016, vol. 32, no. 6, pp. 649-655, 2016 + + journal-article + + 2016 + 11 + + + + pmid + 27899851 + 27899851 + self + + + pmc + PMC5126442 + PMC5126442 + self + + + http://europepmc.org/abstract/med/27899851 + + + + seq0 + role0 + + + + creditname1 + + + creditname2 + + seq2 + + + + + creditname3 + + + role3 + + + + + + seq4 + role4 + + + + diff --git a/pom.xml b/pom.xml index d64de01ac..3629e2f1b 100644 --- a/pom.xml +++ b/pom.xml @@ -458,6 +458,18 @@ ${jsonschemagenerator.version} + + org.apache.commons + commons-text + ${common.text.version} + + + + org.apache.httpcomponents + httpclient + ${org.apache.httpcomponents.version} + + From 1513174d7ec367222c063ba47095ee7ca4897e99 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Tue, 10 Nov 2020 11:44:55 +0100 Subject: [PATCH 025/108] added further test case --- .../SparkGenEnrichedOrcidWorks.java | 2 +- .../orcidnodoi/similarity/AuthorMatcher.java | 50 +++-- .../orcidnodoi/xml/OrcidNoDoiTest.java | 181 ++++++++++++++++-- .../xml/activity_work_0000-0003-2760-1191.xml | 2 +- 4 files changed, 202 insertions(+), 33 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 691ca3eee..40cd212da 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -96,7 +96,7 @@ public class SparkGenEnrichedOrcidWorks { Encoders.tuple(Encoders.STRING(), Encoders.STRING())) .filter(Objects::nonNull) .toJavaRDD(); -// enrichedWorksRDD.saveAsTextFile(workingPath + outputEnrichedWorksPath); + enrichedWorksRDD.saveAsTextFile(workingPath + "enrichedWorksText/"); logger.info("Enriched works RDD ready."); final LongAccumulator parsedPublications = spark.sparkContext().longAccumulator("parsedPublications"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java index 6a1468f4c..2f86820fb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java @@ -33,7 +33,7 @@ import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; public class AuthorMatcher { private static final Logger logger = LoggerFactory.getLogger(AuthorMatcher.class); - private static final Double threshold = 0.8; + public static final Double threshold = 0.8; public static void match(AuthorData author, List contributors) throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { @@ -41,16 +41,35 @@ public class AuthorMatcher { int matchCounter = 0; List matchCounters = Arrays.asList(matchCounter); Contributor contributor = null; - contributors.stream().filter(c -> !StringUtils.isBlank(c.getCreditName())).forEach(c -> { - if (simpleMatch(c.getCreditName(), author.getName()) || - simpleMatch(c.getCreditName(), author.getSurname()) || - simpleMatch(c.getCreditName(), author.getOtherName())) { - matchCounters.set(0, matchCounters.get(0) + 1); - c.setSimpleMatch(true); - } - }); + contributors + .stream() + .filter(c -> !StringUtils.isBlank(c.getCreditName())) + .forEach(c -> { + if (simpleMatch(c.getCreditName(), author.getName()) || + simpleMatch(c.getCreditName(), author.getSurname()) || + simpleMatch(c.getCreditName(), author.getOtherName())) { + matchCounters.set(0, matchCounters.get(0) + 1); + c.setSimpleMatch(true); + } + }); if (matchCounters.get(0) == 1) { updateAuthorsSimpleMatch(contributors, author); + } else if (matchCounters.get(0) == 0) { + Optional optCon = contributors + .stream() + .filter(c -> !StringUtils.isBlank(c.getCreditName())) + .map(c -> { + c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); + return c; + }) + .filter(c -> c.getScore() >= threshold) + .max(Comparator.comparing(c -> c.getScore())); + Contributor bestMatchContributor = null; + if (optCon.isPresent()) { + bestMatchContributor = optCon.get(); + bestMatchContributor.setBestMatch(true); + updateAuthorsSimilarityMatch(contributors, author); + } } else if (matchCounters.get(0) > 1) { Optional optCon = contributors .stream() @@ -68,19 +87,18 @@ public class AuthorMatcher { bestMatchContributor.setBestMatch(true); updateAuthorsSimilarityMatch(contributors, author); } - } } - private static boolean simpleMatch(String name, String searchValue) { + public static boolean simpleMatch(String name, String searchValue) { if (searchValue == null) { return false; } return normalize(name).contains(normalize(searchValue)); } - private static Double bestMatch(String authorSurname, String authorName, String contributor) { + public static Double bestMatch(String authorSurname, String authorName, String contributor) { String[] contributorSplitted = contributor.split(" "); if (contributorSplitted.length == 0) { return 0.0; @@ -106,7 +124,7 @@ public class AuthorMatcher { return sm2; } - private static Double similarity(String nameA, String surnameA, String nameB, String surnameB) { + public static Double similarity(String nameA, String surnameA, String nameB, String surnameB) { Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB); return score; } @@ -115,7 +133,7 @@ public class AuthorMatcher { return new JaroWinklerSimilarity().apply(normalize(parse(nameA, surnameA)), normalize(parse(nameB, surnameB))); } - private static String normalize(final String s) { + public static String normalize(final String s) { if (s == null) { return new String(""); } @@ -140,7 +158,7 @@ public class AuthorMatcher { return surname + " " + name; } - private static void updateAuthorsSimpleMatch(List contributors, AuthorData author) { + public static void updateAuthorsSimpleMatch(List contributors, AuthorData author) { contributors.forEach(c -> { if (c.isSimpleMatch()) { c.setName(author.getName()); @@ -151,7 +169,7 @@ public class AuthorMatcher { updateRanks(contributors); } - private static void updateAuthorsSimilarityMatch(List contributors, AuthorData author) { + public static void updateAuthorsSimilarityMatch(List contributors, AuthorData author) { contributors .stream() .filter(c -> c.isBestMatch()) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index fa2980ac4..c2c4ed5e1 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -38,12 +38,9 @@ public class OrcidNoDoiTest { private static final Logger logger = LoggerFactory.getLogger(OrcidNoDoiTest.class); - String nameA = "Khairy"; - String surnameA = "Abdel Dayem"; - String otherNameA = "Dayem MKA"; - String nameB = "K"; - String surnameB = "Abdel-Dayem"; - String orcidIdA = "0000-0003-2760-1191"; + static String nameA = "Khairy"; + static String surnameA = "Abdel Dayem"; + static String orcidIdA = "0000-0003-2760-1191"; @Test public void readPublicationFieldsTest() @@ -99,7 +96,7 @@ public class OrcidNoDoiTest { } @Test - public void authorMatchTest() throws Exception { + public void authorDoubleMatchTest() throws Exception { logger.info("running authorSimpleMatchTest ...."); String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml"; AuthorData author = new AuthorData(); @@ -129,16 +126,8 @@ public class OrcidNoDoiTest { assertTrue(a.getCreditName().equals("Abdel-Dayem K")); AuthorMatcher.match(author, workData.getContributors()); - GsonBuilder builder = new GsonBuilder(); - Gson gson = builder.create(); - logger.info(gson.toJson(workData)); assertTrue(workData.getContributors().size() == 6); - Contributor c = workData.getContributors().get(0); - assertTrue(c.getOid().equals("0000-0003-2760-1191")); - assertTrue(c.getName().equals("Khairy")); - assertTrue(c.getSurname().equals("Abdel Dayem")); - assertTrue(c.getCreditName().equals("Abdel-Dayem K")); } @Test @@ -180,4 +169,166 @@ public class OrcidNoDoiTest { assertTrue(workData.getContributors().get(4).getSequence().equals("seq4")); assertTrue(workData.getContributors().get(4).getRole().equals("role4")); } + + @Test + public void authorSimpleMatchTest() throws Exception { + String orcidWork = "activity_work_0000-0002-5982-8983.xml"; + AuthorData author = new AuthorData(); + author.setName("Parkhouse"); + author.setSurname("H."); + author.setOid("0000-0002-5982-8983"); + String xml = IOUtils + .toString( + OrcidNoDoiTest.class.getResourceAsStream(orcidWork)); + + if (xml == null) { + logger.info("Resource not found"); + } + XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); + if (p == null) { + logger.info("XMLRecordParserNoDoi null"); + } + WorkDataNoDoi workData = null; + try { + workData = p.VTDParseWorkData(xml.getBytes()); + } catch (Exception e) { + logger.error("parsing xml", e); + } + assertNotNull(workData); + + Contributor a = workData.getContributors().get(0); + assertTrue(a.getCreditName().equals("Parkhouse, H.")); + + AuthorMatcher.match(author, workData.getContributors()); + + assertTrue(workData.getContributors().size() == 2); + Contributor c = workData.getContributors().get(0); + assertTrue(c.getOid().equals("0000-0002-5982-8983")); + assertTrue(c.getName().equals("Parkhouse")); + assertTrue(c.getSurname().equals("H.")); + assertTrue(c.getCreditName().equals("Parkhouse, H.")); + } + + @Test + public void match() { + + AuthorData author = new AuthorData(); + author.setName("Joe"); + author.setSurname("Dodge"); + author.setOid("0000-1111-2222-3333"); + Contributor contributor = new Contributor(); + contributor.setCreditName("Joe Dodge"); + List contributors = Arrays.asList(contributor); + AuthorMatcher am = new AuthorMatcher(); + int matchCounter = 0; + List matchCounters = Arrays.asList(matchCounter); + contributors + .stream() + .filter(c -> !StringUtils.isBlank(c.getCreditName())) + .forEach(c -> { + if (am.simpleMatch(c.getCreditName(), author.getName()) || + am.simpleMatch(c.getCreditName(), author.getSurname()) || + am.simpleMatch(c.getCreditName(), author.getOtherName())) { + matchCounters.set(0, matchCounters.get(0) + 1); + c.setSimpleMatch(true); + } + }); + + assertTrue(matchCounters.get(0) == 1); + am.updateAuthorsSimpleMatch(contributors, author); + assertTrue(contributors.get(0).getName().equals("Joe")); + assertTrue(contributors.get(0).getSurname().equals("Dodge")); + assertTrue(contributors.get(0).getCreditName().equals("Joe Dodge")); + assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333")); + + AuthorData authorX = new AuthorData(); + authorX.setName(nameA); + authorX.setSurname(surnameA); + authorX.setOid(orcidIdA); + Contributor contributorA = new Contributor(); + contributorA.setCreditName("Abdel-Dayem Khai"); + Contributor contributorB = new Contributor(); + contributorB.setCreditName("Abdel-Dayem Fake"); + List contributorList = new ArrayList<>(); + contributorList.add(contributorA); + contributorList.add(contributorB); + int matchCounter2 = 0; + List matchCounters2 = Arrays.asList(matchCounter2); + contributorList + .stream() + .filter(c -> !StringUtils.isBlank(c.getCreditName())) + .forEach(c -> { + if (am.simpleMatch(c.getCreditName(), authorX.getName()) || + am.simpleMatch(c.getCreditName(), authorX.getSurname()) || + am.simpleMatch(c.getCreditName(), authorX.getOtherName())) { + int currentCounter = matchCounters2.get(0); + currentCounter += 1; + matchCounters2.set(0, currentCounter); + c.setSimpleMatch(true); + } + }); + + assertTrue(matchCounters2.get(0) == 2); + assertTrue(contributorList.get(0).isSimpleMatch()); + assertTrue(contributorList.get(1).isSimpleMatch()); + + Optional optCon = contributorList + .stream() + .filter(c -> c.isSimpleMatch()) + .filter(c -> !StringUtils.isBlank(c.getCreditName())) + .map(c -> { + c.setScore(am.bestMatch(authorX.getName(), authorX.getSurname(), c.getCreditName())); + return c; + }) + .filter(c -> c.getScore() >= AuthorMatcher.threshold) + .max(Comparator.comparing(c -> c.getScore())); + assertTrue(optCon.isPresent()); + + final Contributor bestMatchContributor = optCon.get(); + bestMatchContributor.setBestMatch(true); + assertTrue(bestMatchContributor.getCreditName().equals("Abdel-Dayem Khai")); + assertTrue(contributorList.get(0).isBestMatch()); + assertTrue(!contributorList.get(1).isBestMatch()); + am.updateAuthorsSimilarityMatch(contributorList, authorX); + assertTrue(contributorList.get(0).getName().equals(nameA)); + assertTrue(contributorList.get(0).getSurname().equals(surnameA)); + assertTrue(contributorList.get(0).getCreditName().equals("Abdel-Dayem Khai")); + assertTrue(contributorList.get(0).getOid().equals(orcidIdA)); + assertTrue(StringUtils.isBlank(contributorList.get(1).getOid())); + } + + @Test + public void authorBestMatchTest() throws Exception { + String name = "Khairy"; + String surname = "Abdel Dayem"; + String orcidWork = "activity_work_0000-0003-2760-1191.xml"; + AuthorData author = new AuthorData(); + author.setName(name); + author.setSurname(surname); + author.setOid(orcidIdA); + String xml = IOUtils + .toString( + OrcidNoDoiTest.class.getResourceAsStream(orcidWork)); + + if (xml == null) { + logger.info("Resource not found"); + } + XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); + if (p == null) { + logger.info("XMLRecordParserNoDoi null"); + } + WorkDataNoDoi workData = null; + try { + workData = p.VTDParseWorkData(xml.getBytes()); + } catch (Exception e) { + logger.error("parsing xml", e); + } + AuthorMatcher.match(author, workData.getContributors()); + assertTrue(workData.getContributors().size() == 5); + List c = workData.getContributors(); + assertTrue(c.get(0).getName().equals(name)); + assertTrue(c.get(0).getSurname().equals(surname)); + assertTrue(c.get(0).getCreditName().equals("Khair Abde Daye")); + assertTrue(c.get(0).getOid().equals(orcidIdA)); + } } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191.xml index 485f4f8e8..83752b145 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191.xml +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcidnodoi/xml/activity_work_0000-0003-2760-1191.xml @@ -68,7 +68,7 @@ http://europepmc.org/abstract/med/27899851 - Abdel-Dayem K + Khair Abde Daye first author From 1f861f2b0de77d6a693c5d5144696409c32592a7 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Wed, 11 Nov 2020 17:38:50 +0100 Subject: [PATCH 026/108] now wf output is a sequence file with the format seq("eu.dnetlib.dhp.schema.oaf.Publication",eu.dnetlib.dhp.schema.action.AtomicActions) --- .../SparkGenEnrichedOrcidWorks.java | 62 ++++++++++--------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 40cd212da..7f715fa7d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -1,14 +1,21 @@ package eu.dnetlib.doiboost.orcidnodoi; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.IOException; -import java.util.Objects; -import java.util.Optional; - +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import com.google.gson.JsonParser; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.doiboost.orcid.json.JsonHelper; +import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; +import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; @@ -16,24 +23,17 @@ import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - -import com.google.gson.Gson; -import com.google.gson.JsonElement; -import com.google.gson.JsonParser; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.doiboost.orcid.json.JsonHelper; -import eu.dnetlib.doiboost.orcid.model.AuthorData; -import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; -import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; -import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; import scala.Tuple2; +import java.io.IOException; +import java.util.Objects; +import java.util.Optional; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + /** * This spark job generates one parquet file, containing orcid publications dataset */ @@ -42,6 +42,8 @@ public class SparkGenEnrichedOrcidWorks { static Logger logger = LoggerFactory.getLogger(SparkGenEnrichedOrcidWorks.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + public static void main(String[] args) throws IOException, Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -58,6 +60,7 @@ public class SparkGenEnrichedOrcidWorks { final String workingPath = parser.get("workingPath"); final String outputEnrichedWorksPath = parser.get("outputEnrichedWorksPath"); final String outputWorksPath = parser.get("outputWorksPath"); + final String hdfsServerUri = parser.get("hdfsServerUri"); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -96,7 +99,6 @@ public class SparkGenEnrichedOrcidWorks { Encoders.tuple(Encoders.STRING(), Encoders.STRING())) .filter(Objects::nonNull) .toJavaRDD(); - enrichedWorksRDD.saveAsTextFile(workingPath + "enrichedWorksText/"); logger.info("Enriched works RDD ready."); final LongAccumulator parsedPublications = spark.sparkContext().longAccumulator("parsedPublications"); @@ -124,15 +126,17 @@ public class SparkGenEnrichedOrcidWorks { }) .filter(p -> p != null); - Dataset publicationDataset = spark - .createDataset( - oafPublicationRDD.repartition(1).rdd(), - Encoders.bean(Publication.class)); - publicationDataset - .write() - .format("parquet") - .mode(SaveMode.Overwrite) - .save(workingPath + outputEnrichedWorksPath); + oafPublicationRDD + .mapToPair( + p -> new Tuple2<>(p.getClass().toString(), + OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, (Publication) p)))) + .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .saveAsNewAPIHadoopFile( + workingPath.concat(outputEnrichedWorksPath), + Text.class, + Text.class, + SequenceFileOutputFormat.class, + sc.hadoopConfiguration()); logger.info("parsedPublications: " + parsedPublications.value().toString()); logger.info("enrichedPublications: " + enrichedPublications.value().toString()); From 13f28fa225d248e080c13d99694c3069826b3184 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 12 Nov 2020 17:43:32 +0100 Subject: [PATCH 027/108] moved AuthorData to dhp-schemas; added other names to author data --- .../dnetlib/dhp/schema/orcid}/AuthorData.java | 18 +- .../orcid/SparkGenerateDoiAuthorList.java | 5 +- .../doiboost/orcid/SummariesDecompressor.java | 7 +- .../doiboost/orcid/xml/XMLRecordParser.java | 8 +- .../SparkGenEnrichedOrcidWorks.java | 38 +- .../doiboost/orcidnodoi/json/JsonWriter.java | 19 +- .../orcidnodoi/model/Contributor.java | 2 +- .../orcidnodoi/similarity/AuthorMatcher.java | 11 +- .../oozie_app/config-default.xml | 2 +- .../orcid/xml/XMLRecordParserTest.java | 21 +- .../orcidnodoi/xml/OrcidNoDoiTest.java | 12 +- .../orcid/xml/record_8888-8888-8888-8880.xml | 770 ++++++++++++++++++ .../summary_0000-0001-5109-1000_othername.xml | 196 +++++ 13 files changed, 1053 insertions(+), 56 deletions(-) rename {dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model => dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid}/AuthorData.java (71%) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/record_8888-8888-8888-8880.xml create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/summary_0000-0001-5109-1000_othername.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorData.java similarity index 71% rename from dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorData.java index e0624509b..6c94cdb13 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/AuthorData.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorData.java @@ -1,7 +1,10 @@ -package eu.dnetlib.doiboost.orcid.model; +package eu.dnetlib.dhp.schema.orcid; import java.io.Serializable; +import java.util.List; + +import com.google.common.collect.Lists; /** * This class models the data that are retrieved from orcid publication @@ -13,8 +16,8 @@ public class AuthorData implements Serializable { private String name; private String surname; private String creditName; - private String otherName; private String errorCode; + private List otherNames; public String getErrorCode() { return errorCode; @@ -56,11 +59,14 @@ public class AuthorData implements Serializable { this.oid = oid; } - public String getOtherName() { - return otherName; + public List getOtherNames() { + return otherNames; } - public void setOtherName(String otherName) { - this.otherName = otherName; + public void setOtherNames(List otherNames) { + if (this.otherNames == null) { + this.otherNames = Lists.newArrayList(); + } + this.otherNames = otherNames; } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java index b4239bba2..011c153ec 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java @@ -13,9 +13,6 @@ import java.util.stream.Stream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; @@ -33,7 +30,7 @@ import com.google.gson.JsonElement; import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcid.model.WorkData; import scala.Tuple2; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java index 29d72ed0b..d1b2a1d73 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java @@ -19,7 +19,7 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.mortbay.log.Log; -import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; @@ -56,6 +56,7 @@ public class SummariesDecompressor { int nameFound = 0; int surnameFound = 0; int creditNameFound = 0; + int otherNamesFound = 0; int errorFromOrcidFound = 0; int xmlParserErrorFound = 0; try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) { @@ -117,6 +118,9 @@ public class SummariesDecompressor { if (authorData.getCreditName() != null) { creditNameFound += 1; } + if (authorData.getOtherNames() != null && authorData.getOtherNames().size() > 1) { + otherNamesFound += authorData.getOtherNames().size(); + } } else { Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString()); @@ -152,6 +156,7 @@ public class SummariesDecompressor { Log.info("Name found: " + nameFound); Log.info("Surname found: " + surnameFound); Log.info("Credit name found: " + creditNameFound); + Log.info("Other names found: " + otherNamesFound); Log.info("Error from Orcid found: " + errorFromOrcidFound); Log.info("Error parsing xml record found: " + xmlParserErrorFound); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java index 2e43f4d3e..a807cf132 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java @@ -14,7 +14,7 @@ import com.ximpleware.VTDNav; import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.parser.utility.VtdUtilityParser; -import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcid.model.WorkData; public class XMLRecordParser { @@ -81,6 +81,12 @@ public class XMLRecordParser { if (!creditNames.isEmpty()) { authorData.setCreditName(creditNames.get(0)); } + + final List otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content"); + if (!otherNames.isEmpty()) { + authorData.setOtherNames(otherNames); + } + return authorData; } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 7f715fa7d..cc65b0b4f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -1,18 +1,12 @@ package eu.dnetlib.doiboost.orcidnodoi; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.gson.Gson; -import com.google.gson.JsonElement; -import com.google.gson.JsonParser; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.action.AtomicAction; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.doiboost.orcid.json.JsonHelper; -import eu.dnetlib.doiboost.orcid.model.AuthorData; -import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; -import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; -import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.util.Objects; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; @@ -26,14 +20,22 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import com.google.gson.JsonParser; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.orcid.AuthorData; +import eu.dnetlib.doiboost.orcid.json.JsonHelper; +import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; +import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; +import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; import scala.Tuple2; -import java.io.IOException; -import java.util.Objects; -import java.util.Optional; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - /** * This spark job generates one parquet file, containing orcid publications dataset */ diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java index 363cb13e6..982fb6316 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java @@ -1,9 +1,12 @@ package eu.dnetlib.doiboost.orcidnodoi.json; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.JsonObject; -import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcid.model.WorkData; /** @@ -12,15 +15,11 @@ import eu.dnetlib.doiboost.orcid.model.WorkData; public class JsonWriter { - public static String create(AuthorData authorData) { - JsonObject author = new JsonObject(); - author.addProperty("oid", authorData.getOid()); - author.addProperty("name", authorData.getName()); - author.addProperty("surname", authorData.getSurname()); - if (authorData.getCreditName() != null) { - author.addProperty("creditname", authorData.getCreditName()); - } - return author.toString(); + public static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .setSerializationInclusion(JsonInclude.Include.NON_NULL);; + + public static String create(AuthorData authorData) throws JsonProcessingException { + return OBJECT_MAPPER.writeValueAsString(authorData); } public static String create(WorkData workData) { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java index 9a8651c85..9222c1cc4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/model/Contributor.java @@ -3,7 +3,7 @@ package eu.dnetlib.doiboost.orcidnodoi.model; import java.io.Serializable; -import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.dhp.schema.orcid.AuthorData; /** * This class models the data related to a contributor, that are retrieved from an orcid publication diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java index 2f86820fb..c0f617868 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java @@ -18,7 +18,7 @@ import com.ximpleware.XPathEvalException; import com.ximpleware.XPathParseException; import eu.dnetlib.dhp.parser.utility.VtdException; -import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcidnodoi.model.Contributor; import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; @@ -47,7 +47,7 @@ public class AuthorMatcher { .forEach(c -> { if (simpleMatch(c.getCreditName(), author.getName()) || simpleMatch(c.getCreditName(), author.getSurname()) || - simpleMatch(c.getCreditName(), author.getOtherName())) { + simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) { matchCounters.set(0, matchCounters.get(0) + 1); c.setSimpleMatch(true); } @@ -91,6 +91,13 @@ public class AuthorMatcher { } + public static boolean simpleMatchOnOtherNames(String name, List otherNames) { + if (otherNames == null || (otherNames != null && otherNames.isEmpty())) { + return false; + } + return otherNames.stream().filter(o -> simpleMatch(name, o)).count() > 0; + } + public static boolean simpleMatch(String name, String searchValue) { if (searchValue == null) { return false; diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml index e1829e847..191654378 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_summaries/oozie_app/config-default.xml @@ -21,6 +21,6 @@ oozie.launcher.mapreduce.map.java.opts - -Xmx16g + -Xmx8g \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java index 5bf6f27b9..b7be5e5cd 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java @@ -2,12 +2,14 @@ package eu.dnetlib.doiboost.orcid.xml; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.Test; -import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcid.model.WorkData; +import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; public class XMLRecordParserTest { @@ -55,4 +57,21 @@ public class XMLRecordParserTest { assertNotNull(workData.getDoi()); System.out.println("doi: " + workData.getDoi()); } + + @Test + public void testOrcidOtherNamesXMLParser() throws Exception { + + String xml = IOUtils + .toString( + this.getClass().getResourceAsStream("summary_0000-0001-5109-1000_othername.xml")); + + XMLRecordParser p = new XMLRecordParser(); + + AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); + assertNotNull(authorData); + assertNotNull(authorData.getOtherNames()); + assertTrue(authorData.getOtherNames().get(0).equals("Andrew C. Porteus")); + String jsonData = JsonWriter.create(authorData); + assertNotNull(jsonData); + } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index c2c4ed5e1..948e5b094 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -5,34 +5,24 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; -import java.text.Normalizer; import java.util.*; -import javax.validation.constraints.AssertTrue; - import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.text.similarity.JaccardSimilarity; -import org.apache.commons.text.similarity.JaroWinklerSimilarity; import org.junit.jupiter.api.Test; -import org.mortbay.log.Log; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; import com.ximpleware.NavException; import com.ximpleware.ParseException; import com.ximpleware.XPathEvalException; import com.ximpleware.XPathParseException; import eu.dnetlib.dhp.parser.utility.VtdException; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.doiboost.orcid.model.AuthorData; +import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcidnodoi.model.Contributor; import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; -import jdk.nashorn.internal.ir.annotations.Ignore; public class OrcidNoDoiTest { diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/record_8888-8888-8888-8880.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/record_8888-8888-8888-8880.xml new file mode 100644 index 000000000..7abc2f35a --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/record_8888-8888-8888-8880.xml @@ -0,0 +1,770 @@ + + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + zh_CN + + + API + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + true + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + 2001-12-31T12:00:00 + true + true + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + give-names + family-name + credit-name + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + other-name-1 + + + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + biography + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + url-name-1 + http://url.com/ + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + user1@email.com + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + US + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + keyword1 + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + type-1 + value-1 + http://url.com/1 + self + + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + + + + agr + external-id-value + + http://orcid.org + part-of + + + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + distinction:department-name + distinction:role-title + + 1948 + 02 + 02 + + + 1948 + 02 + 02 + + + distinction-org + + common:city + common:region + AF + + + common:disambiguated-organization-identifier-distinction + GRID + + + + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + + + + agr + external-id-value + + http://orcid.org + part-of + + + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + education:department-name + education:role-title + + 1948 + 02 + 02 + + + 2019 + 01 + 01 + + + education-org + + common:city + common:region + AF + + + common:disambiguated-organization-identifier-education + GRID + + + + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + + + + agr + external-id-value + + http://orcid.org + part-of + + + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + employment:department-name + employment:role-title + + 1948 + 02 + 02 + + + 2025 + + + employment-org + + common:city + common:region + AF + + + common:disambiguated-organization-identifier-employment + GRID + + + + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + + + grant_number + external-id-value-1 + + + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + + common:title + common:translated-title + + + + grant_number + external-id-value-1 + http://tempuri.org + self + + + grant + + 1948 + 02 + 02 + + + 1948 + 02 + 02 + + + common:name + + common:city + common:region + AF + + + common:disambiguated-organization-identifier-funding + FUNDREF + + + + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + + + + agr + external-id-value + + http://orcid.org + part-of + + + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + invited-position:department-name + invited-position:role-title + + 2019 + 01 + 01 + + + 2025 + 01 + 01 + + + invited-position-org + + common:city + common:region + AF + + + common:disambiguated-organization-identifier-invited-position + GRID + + + + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + + + + agr + external-id-value + + http://orcid.org + part-of + + + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + membership:department-name + membership:role-title + + 1948 + 02 + 02 + + + membership-org + + common:city + common:region + AF + + + common:disambiguated-organization-identifier-membership + RINGGOLD + + + + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + + + something + external-id-value + http://orcid.org + self + + + + 2001-12-31T12:00:00 + + + something + external-id-value + + http://orcid.org + self + + + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/client/APP-9999999999999901 + APP-9999999999999901 + orcid.org + + + + reviewer + + + something + external-id-value + http://orcid.org + self + + + http://orcid.org + review + + 1948 + 02 + 02 + + orcid-generated:12345 + + common:name + + common:city + common:region + AF + + + common:disambiguated-organization-identifier-peer-review + RINGGOLD + + + + + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + + + + agr + external-id-value + + http://orcid.org + part-of + + + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + qualification:department-name + qualification:role-title + + 1948 + 02 + 02 + + + 2025 + 12 + + + qualification-org + + common:city + common:region + AF + + + common:disambiguated-organization-identifier-qualification + RINGGOLD + + + + + + + 2017-01-18T15:06:05.147-06:00 + + 2017-01-18T15:03:56.856-06:00 + + + proposal_id + 123456 + self + + + + + 2015-06-25T16:01:12.718Z + 2017-09-08T13:31:19.987Z + + + + https://orcid.org/0000-0000-0000-0000 + 0000-0000-0000-0000 + orcid.org + + XSEDE ORCID integration + + + + + Giant Laser Award + + + + XSEDE + + city + region + US + + + XX + grid + + + + + + proposal_id + 123456 + self + + + + 1999 + 02 + 02 + + + 2012 + 02 + 02 + + http://xsede.org/GiantLaserAward + + + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + + + + agr + external-id-value + + http://orcid.org + part-of + + + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + service:department-name + service:role-title + + 1948 + 02 + 02 + + + service-org + + common:city + common:region + AF + + + common:disambiguated-organization-identifier-service + RINGGOLD + + + + + + + 2001-12-31T12:00:00 + + 2001-12-31T12:00:00 + + + agr + external-id-value + http://orcid.org + part-of + + + + 2001-12-31T12:00:00 + 2001-12-31T12:00:00 + + + https://orcid.org/client/8888-8888-8888-8880 + 8888-8888-8888-8880 + orcid.org + + + + + common:title + + common:translated-title + + + + agr + external-id-value + http://tempuri.org + self + + + artistic-performance + + 1948 + 02 + 02 + + Procedia Computer Science + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/summary_0000-0001-5109-1000_othername.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/summary_0000-0001-5109-1000_othername.xml new file mode 100644 index 000000000..43bc96b8c --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/summary_0000-0001-5109-1000_othername.xml @@ -0,0 +1,196 @@ + + + + https://orcid.org/0000-0001-5109-1000 + 0000-0001-5109-1000 + orcid.org + + + en + + + Member-referred + 2019-05-01T13:04:57.507Z + 2019-05-01T13:59:54.268Z + true + true + true + + + 2019-05-01T13:45:47.727Z + + 2019-05-01T13:04:57.507Z + 2019-05-01T13:04:57.740Z + Andrew + Porteus + + + 2019-05-01T13:44:57.072Z + + 2019-05-01T13:44:57.072Z + 2019-05-01T13:44:57.072Z + + + https://orcid.org/0000-0001-5109-1000 + 0000-0001-5109-1000 + orcid.org + + Andrew Porteus + + Andrew C. Porteus + + + + 2019-05-01T13:59:54.263Z + 2019-05-01T13:59:54.263Z + Retired Librarian + + + 2019-05-01T13:45:47.727Z + + 2019-05-01T13:45:47.727Z + 2019-05-01T13:45:47.727Z + + + https://orcid.org/0000-0001-5109-1000 + 0000-0001-5109-1000 + orcid.org + + Andrew Porteus + + Niagara Falls Poetry Project + http://niagarapoetry.ca + + + + + 2019-05-01T13:45:09.764Z + + 2019-05-01T13:45:09.764Z + 2019-05-01T13:45:09.764Z + + + https://orcid.org/0000-0001-5109-1000 + 0000-0001-5109-1000 + orcid.org + + Andrew Porteus + + CA + + + + + + + 2019-05-01T13:57:45.787Z + + + 2019-05-01T13:15:26.102Z + + 2019-05-01T13:15:26.102Z + + + 2019-05-01T13:15:26.102Z + 2019-05-01T13:15:26.102Z + + + https://orcid.org/0000-0001-5109-1000 + 0000-0001-5109-1000 + orcid.org + + Andrew Porteus + + Library Technician Diploma + + 1976 + 09 + + + 1978 + 05 + + + Niagara College + + Welland + ON + CA + + + 125147 + RINGGOLD + + + + + + + + + + + + 2019-05-01T13:19:49.021Z + + 2019-05-01T13:19:49.021Z + + + 2019-05-01T13:19:49.021Z + 2019-05-01T13:19:49.021Z + + + https://orcid.org/0000-0001-5109-1000 + 0000-0001-5109-1000 + orcid.org + + Andrew Porteus + + Communication, Film & Popular Culture + Master's Candidate + + 2018 + 09 + + + Brock University + + Saint Catharines + ON + CA + + + 7497 + RINGGOLD + + + + + + + \ No newline at end of file From 9a2fa9dc2f45f030ca358621ddbbd51a4be3bf2c Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Fri, 13 Nov 2020 10:25:34 +0100 Subject: [PATCH 028/108] added test for other names parsing from summaries dump --- .../orcidnodoi/xml/OrcidNoDoiTest.java | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index 948e5b094..1f77197ab 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -13,6 +13,7 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.Lists; import com.ximpleware.NavException; import com.ximpleware.ParseException; import com.ximpleware.XPathEvalException; @@ -218,7 +219,7 @@ public class OrcidNoDoiTest { .forEach(c -> { if (am.simpleMatch(c.getCreditName(), author.getName()) || am.simpleMatch(c.getCreditName(), author.getSurname()) || - am.simpleMatch(c.getCreditName(), author.getOtherName())) { + am.simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) { matchCounters.set(0, matchCounters.get(0) + 1); c.setSimpleMatch(true); } @@ -250,7 +251,7 @@ public class OrcidNoDoiTest { .forEach(c -> { if (am.simpleMatch(c.getCreditName(), authorX.getName()) || am.simpleMatch(c.getCreditName(), authorX.getSurname()) || - am.simpleMatch(c.getCreditName(), authorX.getOtherName())) { + am.simpleMatchOnOtherNames(c.getCreditName(), author.getOtherNames())) { int currentCounter = matchCounters2.get(0); currentCounter += 1; matchCounters2.set(0, currentCounter); @@ -321,4 +322,27 @@ public class OrcidNoDoiTest { assertTrue(c.get(0).getCreditName().equals("Khair Abde Daye")); assertTrue(c.get(0).getOid().equals(orcidIdA)); } + + @Test + public void otherNamesMatchTest() + throws VtdException, ParseException, IOException, XPathEvalException, NavException, XPathParseException { + + AuthorData author = new AuthorData(); + author.setName("Joe"); + author.setSurname("Dodge"); + author.setOid("0000-1111-2222-3333"); + String otherName1 = new String("Joe Dr. Dodge"); + String otherName2 = new String("XY"); + List others = Lists.newArrayList(); + others.add(otherName1); + others.add(otherName2); + author.setOtherNames(others); + Contributor contributor = new Contributor(); + contributor.setCreditName("XY"); + List contributors = Arrays.asList(contributor); + AuthorMatcher.match(author, contributors); + assertTrue(contributors.get(0).getName().equals("Joe")); + assertTrue(contributors.get(0).getSurname().equals("Dodge")); + assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333")); + } } From 005f849674c93f44e9a3e66b86211dd8f38f8919 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Fri, 13 Nov 2020 12:45:31 +0100 Subject: [PATCH 029/108] added compression to output dataset --- .../dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index cc65b0b4f..a92d534d8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -128,6 +128,8 @@ public class SparkGenEnrichedOrcidWorks { }) .filter(p -> p != null); + sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true"); + oafPublicationRDD .mapToPair( p -> new Tuple2<>(p.getClass().toString(), From c0c2e05eae56c3dad6e111177d88f1959b654d2e Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Tue, 17 Nov 2020 18:23:12 +0100 Subject: [PATCH 030/108] added wf to extracting authors and works xml data from orcid dump to hdfs; added wf to download the lamda file (containing last orcid update informations) from orcid to hdfs --- .../orcid/ActivitiesDecompressor.java | 61 +++++ .../orcid/ExtractXMLActivitiesData.java | 54 ++++ .../orcid/ExtractXMLSummariesData.java | 56 +++++ .../doiboost/orcid/SummariesDecompressor.java | 64 +++++ .../doiboost/orcid/xml/XMLRecordParser.java | 31 +++ .../orcid_download/oozie_app/workflow.xml | 45 ---- .../oozie_app/workflow.xml | 232 ++++++++++++++++++ .../oozie_app/config-default.xml | 26 ++ .../oozie_app/workflow.xml | 40 +++ .../oozie_app/config-default.xml | 0 .../oozie_app/workflow.xml | 64 +++++ 11 files changed, 628 insertions(+), 45 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_download/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_activities/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/workflow.xml rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{orcid_download => orcid_updates_download}/oozie_app/config-default.xml (100%) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java index 02d2b267b..420c363ec 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java @@ -17,6 +17,7 @@ import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.compress.GzipCodec; import org.mortbay.log.Log; import eu.dnetlib.doiboost.orcid.model.WorkData; @@ -143,4 +144,64 @@ public class ActivitiesDecompressor { Log.info("Error from Orcid found: " + errorFromOrcidFound); Log.info("Error parsing xml work found: " + xmlParserErrorFound); } + + public static void extractXML(Configuration conf, String inputUri, Path outputPath) + throws Exception { + String uri = inputUri; + FileSystem fs = FileSystem.get(URI.create(uri), conf); + Path inputPath = new Path(uri); + CompressionCodecFactory factory = new CompressionCodecFactory(conf); + CompressionCodec codec = factory.getCodec(inputPath); + if (codec == null) { + System.err.println("No codec found for " + uri); + System.exit(1); + } + CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension()); + InputStream gzipInputStream = null; + try { + gzipInputStream = codec.createInputStream(fs.open(inputPath)); + int counter = 0; + try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) { + TarArchiveEntry entry = null; + try (SequenceFile.Writer writer = SequenceFile + .createWriter( + conf, + SequenceFile.Writer.file(outputPath), + SequenceFile.Writer.keyClass(Text.class), + SequenceFile.Writer.valueClass(Text.class), + SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec()))) { + while ((entry = tais.getNextTarEntry()) != null) { + String filename = entry.getName(); + if (entry.isDirectory() || !filename.contains("works")) { + } else { + counter++; + BufferedReader br = new BufferedReader(new InputStreamReader(tais)); + String line; + StringBuffer buffer = new StringBuffer(); + while ((line = br.readLine()) != null) { + buffer.append(line); + } + String xml = buffer.toString(); + String[] filenameParts = filename.split("/"); + final Text key = new Text( + XMLRecordParser + .retrieveOrcidIdFromActivity( + xml.getBytes(), filenameParts[filenameParts.length - 1])); + final Text value = new Text(xml); + writer.append(key, value); + if ((counter % 100000) == 0) { + Log.info("Current xml works extracted: " + counter); + } + } + } + } + } + Log.info("Activities extraction completed"); + Log.info("Total XML works parsed: " + counter); + } finally { + Log.debug("Closing gzip stream"); + IOUtils.closeStream(gzipInputStream); + } + } + } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java new file mode 100644 index 000000000..c834efa20 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java @@ -0,0 +1,54 @@ + +package eu.dnetlib.doiboost.orcid; + +import java.io.IOException; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.mortbay.log.Log; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork; + +public class ExtractXMLActivitiesData extends OrcidDSManager { + private String outputWorksPath; + private String activitiesFileNameTarGz; + + public static void main(String[] args) throws IOException, Exception { + ExtractXMLActivitiesData extractXMLActivitiesData = new ExtractXMLActivitiesData(); + extractXMLActivitiesData.loadArgs(args); + extractXMLActivitiesData.extractWorks(); + } + + private void loadArgs(String[] args) throws IOException, Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GenOrcidAuthorWork.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json"))); + parser.parseArgument(args); + + hdfsServerUri = parser.get("hdfsServerUri"); + Log.info("HDFS URI: " + hdfsServerUri); + workingPath = parser.get("workingPath"); + Log.info("Working Path: " + workingPath); + activitiesFileNameTarGz = parser.get("activitiesFileNameTarGz"); + Log.info("Activities File Name: " + activitiesFileNameTarGz); + outputWorksPath = parser.get("outputWorksPath"); + Log.info("Output Author Work Data: " + outputWorksPath); + } + + private void extractWorks() throws Exception { + Configuration conf = initConfigurationObject(); + FileSystem fs = initFileSystemObject(conf); + String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); + Path outputPath = new Path( + hdfsServerUri + .concat(workingPath) + .concat(outputWorksPath)); + ActivitiesDecompressor.extractXML(conf, tarGzUri, outputPath); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java new file mode 100644 index 000000000..843889108 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java @@ -0,0 +1,56 @@ + +package eu.dnetlib.doiboost.orcid; + +import java.io.IOException; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.mortbay.log.Log; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork; + +public class ExtractXMLSummariesData extends OrcidDSManager { + + private String outputAuthorsPath; + private String summariesFileNameTarGz; + + public static void main(String[] args) throws IOException, Exception { + ExtractXMLSummariesData extractXMLSummariesData = new ExtractXMLSummariesData(); + extractXMLSummariesData.loadArgs(args); + extractXMLSummariesData.extractAuthors(); + } + + private void loadArgs(String[] args) throws IOException, Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GenOrcidAuthorWork.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json"))); + parser.parseArgument(args); + + hdfsServerUri = parser.get("hdfsServerUri"); + Log.info("HDFS URI: " + hdfsServerUri); + workingPath = parser.get("workingPath"); + Log.info("Working Path: " + workingPath); + summariesFileNameTarGz = parser.get("summariesFileNameTarGz"); + Log.info("Summaries File Name: " + summariesFileNameTarGz); + outputAuthorsPath = parser.get("outputAuthorsPath"); + Log.info("Output Authors Data: " + outputAuthorsPath); + } + + public void extractAuthors() throws Exception { + Configuration conf = initConfigurationObject(); + FileSystem fs = initFileSystemObject(conf); + String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz); + Path outputPath = new Path( + hdfsServerUri + .concat(workingPath) + .concat(outputAuthorsPath) + .concat("xml_authors.seq")); + SummariesDecompressor.extractXML(conf, tarGzUri, outputPath); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java index d1b2a1d73..c16899977 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java @@ -17,6 +17,7 @@ import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.compress.GzipCodec; import org.mortbay.log.Log; import eu.dnetlib.dhp.schema.orcid.AuthorData; @@ -160,4 +161,67 @@ public class SummariesDecompressor { Log.info("Error from Orcid found: " + errorFromOrcidFound); Log.info("Error parsing xml record found: " + xmlParserErrorFound); } + + public static void extractXML(Configuration conf, String inputUri, Path outputPath) + throws Exception { + String uri = inputUri; + FileSystem fs = FileSystem.get(URI.create(uri), conf); + Path inputPath = new Path(uri); + CompressionCodecFactory factory = new CompressionCodecFactory(conf); + CompressionCodec codec = factory.getCodec(inputPath); + if (codec == null) { + System.err.println("No codec found for " + uri); + System.exit(1); + } + CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension()); + InputStream gzipInputStream = null; + try { + gzipInputStream = codec.createInputStream(fs.open(inputPath)); + int counter = 0; + try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) { + TarArchiveEntry entry = null; + CompressionCodec Codec = new GzipCodec(); + org.apache.hadoop.io.SequenceFile.Writer.Option optCom = SequenceFile.Writer + .compression(SequenceFile.CompressionType.RECORD, Codec); + try (SequenceFile.Writer writer = SequenceFile + .createWriter( + conf, + SequenceFile.Writer.file(outputPath), + SequenceFile.Writer.keyClass(Text.class), + SequenceFile.Writer.valueClass(Text.class), optCom)) { + while ((entry = tais.getNextTarEntry()) != null) { + String filename = entry.getName(); + if (entry.isDirectory()) { + Log.debug("Directory entry name: " + entry.getName()); + } else { + Log.debug("XML record entry name: " + entry.getName()); + counter++; + BufferedReader br = new BufferedReader(new InputStreamReader(tais)); + String line; + StringBuffer buffer = new StringBuffer(); + while ((line = br.readLine()) != null) { + buffer.append(line); + } + String xml = buffer.toString(); + final Text key = new Text( + XMLRecordParser + .retrieveOrcidIdFromSummary( + xml.getBytes(), filename.split("/")[2].substring(0, 19))); + final Text value = new Text(xml); + writer.append(key, value); + } + if ((counter % 100000) == 0) { + Log.info("Current xml records extracted: " + counter); + } + } + } + } + Log.info("Summaries extract completed"); + Log.info("Total XML records parsed: " + counter); + + } finally { + Log.debug("Closing gzip stream"); + IOUtils.closeStream(gzipInputStream); + } + } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java index a807cf132..cc9abb621 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java @@ -4,6 +4,8 @@ package eu.dnetlib.doiboost.orcid.xml; import java.util.Arrays; import java.util.List; +import org.mortbay.log.Log; + import com.ximpleware.AutoPilot; import com.ximpleware.EOFException; import com.ximpleware.EncodingException; @@ -126,4 +128,33 @@ public class XMLRecordParser { } return workData; } + + public static String retrieveOrcidIdFromSummary(byte[] bytes, String defaultValue) + throws VtdException, ParseException { + return retrieveOrcidId(bytes, defaultValue, NS_RECORD, NS_RECORD_URL, "//record:record", "path").substring(1); + } + + public static String retrieveOrcidIdFromActivity(byte[] bytes, String defaultValue) + throws VtdException, ParseException { + return retrieveOrcidId(bytes, defaultValue, NS_WORK, NS_WORK_URL, "//work:work", "put-code"); + } + + private static String retrieveOrcidId(byte[] bytes, String defaultValue, String ns, String nsUrl, String xpath, + String idAttributeName) + throws VtdException, ParseException { + final VTDGen vg = new VTDGen(); + vg.setDoc(bytes); + vg.parse(true); + final VTDNav vn = vg.getNav(); + final AutoPilot ap = new AutoPilot(vn); + ap.declareXPathNameSpace(ns, nsUrl); + List recordNodes = VtdUtilityParser + .getTextValuesWithAttributes( + ap, vn, xpath, Arrays.asList(idAttributeName)); + if (!recordNodes.isEmpty()) { + return (recordNodes.get(0).getAttributes().get(idAttributeName)); + } + Log.info("id not found - default: " + defaultValue); + return defaultValue; + } } diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_download/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_download/oozie_app/workflow.xml deleted file mode 100644 index 1f9adeb4d..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_download/oozie_app/workflow.xml +++ /dev/null @@ -1,45 +0,0 @@ - - - - workingPathOrcid - the working dir base path - - - token - access token - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.orcid.OrcidDownloader - -d${workingPathOrcid}/ - -n${nameNode} - -flast_modified.csv - -odownload/ - -t${token} - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_activities/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_activities/oozie_app/workflow.xml new file mode 100644 index 000000000..6f629c754 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_activities/oozie_app/workflow.xml @@ -0,0 +1,232 @@ + + + + workingPath + the working dir base path + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.java + ${oozieActionShareLibForSpark2} + + + oozie.launcher.mapreduce.user.classpath.first + true + + + oozie.launcher.mapreduce.map.java.opts + -Xmx2g + + + oozie.use.system.libpath + true + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData + -w${workingPath}/ + -n${nameNode} + -fORCID_2020_10_activites_0.tar.gz + -owxml/works/xml_works_0.seq + -oew--- + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData + -w${workingPath}/ + -n${nameNode} + -fORCID_2020_10_activites_1.tar.gz + -owxml/works/xml_works_1.seq + -oew--- + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData + -w${workingPath}/ + -n${nameNode} + -fORCID_2020_10_activites_2.tar.gz + -owxml/works/xml_works_2.seq + -oew--- + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData + -w${workingPath}/ + -n${nameNode} + -fORCID_2020_10_activites_3.tar.gz + -owxml/works/xml_works_3.seq + -oew--- + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData + -w${workingPath}/ + -n${nameNode} + -fORCID_2020_10_activites_4.tar.gz + -owxml/works/xml_works_4.seq + -oew--- + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData + -w${workingPath}/ + -n${nameNode} + -fORCID_2020_10_activites_5.tar.gz + -owxml/works/xml_works_5.seq + -oew--- + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData + -w${workingPath}/ + -n${nameNode} + -fORCID_2020_10_activites_6.tar.gz + -owxml/works/xml_works_6.seq + -oew--- + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData + -w${workingPath}/ + -n${nameNode} + -fORCID_2020_10_activites_7.tar.gz + -owxml/works/xml_works_7.seq + -oew--- + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData + -w${workingPath}/ + -n${nameNode} + -fORCID_2020_10_activites_8.tar.gz + -owxml/works/xml_works_8.seq + -oew--- + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData + -w${workingPath}/ + -n${nameNode} + -fORCID_2020_10_activites_9.tar.gz + -owxml/works/xml_works_9.seq + -oew--- + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.ExtractXMLActivitiesData + -w${workingPath}/ + -n${nameNode} + -fORCID_2020_10_activites_X.tar.gz + -owxml/works/xml_works_X.seq + -oew--- + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/config-default.xml new file mode 100644 index 000000000..191654378 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/config-default.xml @@ -0,0 +1,26 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + + + oozie.launcher.mapreduce.map.java.opts + -Xmx8g + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/workflow.xml new file mode 100644 index 000000000..68d468ab3 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_extract_xml_summaries/oozie_app/workflow.xml @@ -0,0 +1,40 @@ + + + + workingPath + the working dir base path + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.ExtractXMLSummariesData + -w${workingPath}/ + -n${nameNode} + -fORCID_2020_10_summaries.tar.gz + -oxml/authors/ + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_download/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_download/oozie_app/config-default.xml rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml new file mode 100644 index 000000000..a3daab116 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml @@ -0,0 +1,64 @@ + + + + workingPath + the working dir base path + + + token + access token + + + shell_cmd + wget -O /tmp/last_modified.csv.tar http://74804fb637bd8e2fba5b-e0a029c2f87486cddec3b416996a6057.r3.cf1.rackcdn.com/last_modified.csv.tar ; hdfs dfs -copyFromLocal /tmp/last_modified.csv.tar /data/orcid_activities_2020/last_modified.csv.tar ; rm -f /tmp/last_modified.csv.tar + + the shell command that downloads the lambda file from orcid containing last orcid update informations + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + bash + -c + ${shell_cmd} + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.orcid.OrcidDownloader + -d${workingPathOrcid}/ + -n${nameNode} + -flast_modified.csv + -odownload/ + -t${token} + + + + + + + \ No newline at end of file From 57cac3689801929d651404fbe47c6a1cb7998f14 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 18 Nov 2020 13:38:03 +0100 Subject: [PATCH 031/108] changed the workflow name --- .../dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml index 4c286e4a2..b809b58fb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + From 46ba3793f61084d59ed251a48b74aef1c157026c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 18 Nov 2020 16:47:31 +0100 Subject: [PATCH 032/108] code, workflow and parameters for the dump of the results associated to funders --- .../dump/funderresults/FunderResults.java | 19 ++ .../SparkPrepareResultProject.java | 165 ++++++++++++++++ .../graph/dump/funder_result_parameters.json | 26 +++ .../oozie_app/config-default.xml | 30 +++ .../dump/funderresults/oozie_app/workflow.xml | 179 ++++++++++++++++++ 5 files changed, 419 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java new file mode 100644 index 000000000..18884a327 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java @@ -0,0 +1,19 @@ + +package eu.dnetlib.dhp.oa.graph.dump.funderresults; + +import java.io.Serializable; +import java.util.List; + +import eu.dnetlib.dhp.schema.dump.oaf.Result; + +public class FunderResults implements Serializable { + private List results; + + public List getResults() { + return results; + } + + public void setResults(List results) { + this.results = results; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java new file mode 100644 index 000000000..179c449f6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java @@ -0,0 +1,165 @@ + +package eu.dnetlib.dhp.oa.graph.dump.funderresults; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.*; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.schema.dump.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; + +/** + * Preparation of the Project information to be added to the dumped results. For each result associated to at least one + * Project, a serialization of an instance af ResultProject closs is done. ResultProject contains the resultId, and the + * list of Projects (as in eu.dnetlib.dhp.schema.dump.oaf.community.Project) it is associated to + */ +public class SparkPrepareResultProject implements Serializable { + private static final Logger log = LoggerFactory.getLogger(SparkPrepareResultProject.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkPrepareResultProject.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String communityMapPath = parser.get("communityMapPath"); + log.info("communityMapPath: {}", communityMapPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + prepareResultProjectList2(spark, inputPath, outputPath, communityMapPath); + }); + } + + private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath, + String communityMapPath) { + + CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); + + Dataset relation = Utils + .readPath(spark, inputPath + "/relation", Relation.class) + .filter("dataInfo.deletedbyinference = false and relClass = 'produces'"); + + Dataset result = Utils + .readPath(spark, inputPath + "/publication", eu.dnetlib.dhp.schema.oaf.Result.class) + .union(Utils.readPath(spark, inputPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Result.class)) + .union(Utils.readPath(spark, inputPath + "/otherresearchproduct", eu.dnetlib.dhp.schema.oaf.Result.class)) + .union(Utils.readPath(spark, inputPath + "/software", eu.dnetlib.dhp.schema.oaf.Result.class)); + + result + .joinWith(relation, result.col("id").equalTo(relation.col("target"))) + .groupByKey( + (MapFunction, String>) value -> value + ._2() + .getSource() + .substring(3, 15), + Encoders.STRING()) + .mapGroups( + (MapGroupsFunction, Tuple2>) ( + s, it) -> { + Tuple2 first = it.next(); + FunderResults fr = new FunderResults(); + List resultList = new ArrayList<>(); + resultList.add(ResultMapper.map(first._1(), communityMap, true)); + it.forEachRemaining(c -> { + resultList.add(ResultMapper.map(c._1(), communityMap, true)); + + }); + fr.setResults(resultList); + return new Tuple2<>(s, fr); + }, Encoders.tuple(Encoders.STRING(), Encoders.bean(FunderResults.class))) + .foreach(t -> { + String funder = t._1(); + spark + .createDataFrame(t._2.getResults(), Result.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/" + funder); + }); + + } + + private static void prepareResultProjectList2(SparkSession spark, String inputPath, String outputPath, + String communityMapPath) { + + CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); + + Dataset relation = Utils + .readPath(spark, inputPath + "/relation", Relation.class) + .filter("dataInfo.deletedbyinference = false and relClass = 'produces'"); + + Dataset result = Utils + .readPath(spark, inputPath + "/publication", eu.dnetlib.dhp.schema.oaf.Result.class) + .union(Utils.readPath(spark, inputPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Result.class)) + .union(Utils.readPath(spark, inputPath + "/otherresearchproduct", eu.dnetlib.dhp.schema.oaf.Result.class)) + .union(Utils.readPath(spark, inputPath + "/software", eu.dnetlib.dhp.schema.oaf.Result.class)); + + result + .joinWith(relation, result.col("id").equalTo(relation.col("target"))) + .groupByKey( + (MapFunction, String>) value -> value + ._2() + .getSource() + .substring(3, 15), + Encoders.STRING()) + .mapGroups( + (MapGroupsFunction, String>) (s, it) -> { + Tuple2 first = it.next(); + List resultList = new ArrayList<>(); + resultList.add(ResultMapper.map(first._1(), communityMap, true)); + it.forEachRemaining(c -> { + resultList.add(ResultMapper.map(c._1(), communityMap, true)); + + }); + spark + .createDataFrame(resultList, Result.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/" + s); + + return new String(); + }, Encoders.STRING()); + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json new file mode 100644 index 000000000..f5eb81983 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "cmp", + "paramLongName": "communityMapPath", + "paramDescription": "the community map path", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml new file mode 100644 index 000000000..e5ec3d0ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml new file mode 100644 index 000000000..5ddab3398 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml @@ -0,0 +1,179 @@ + + + + + sourcePath + the source path + + + isLookUpUrl + the isLookup service endpoint + + + outputPath + the output path + + + accessToken + the access token used for the deposition in Zenodo + + + connectionUrl + the connection url for Zenodo + + + metadata + the metadata associated to the deposition + + + depositionType + the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) + + + conceptRecordId + for new version, the id of the record for the old deposition + + + depositionId + the depositionId of a deposition open that has to be added content + + + hiveDbName + the target hive database name + + + hiveJdbcUrl + hive server jdbc url + + + hiveMetastoreUris + hive server metastore URIs + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap + --outputPath${workingDir}/communityMap + --nameNode${nameNode} + --isLookUpUrl${isLookUpUrl} + + + + + + + + + yarn + cluster + Dump funder results + eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkPrepareResultProject + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath} + --outputPath${workingDir}/result + --communityMapPath${workingDir}/communityMap + + + + + + + + eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS + --hdfsPath${outputPath} + --nameNode${nameNode} + --accessToken${accessToken} + --connectionUrl${connectionUrl} + --metadata${metadata} + --communityMapPath${workingDir}/communityMap + --conceptRecordId${conceptRecordId} + --depositionType${depositionType} + --depositionId${depositionId} + + + + + + + + \ No newline at end of file From a172a37ad1c8259d95c1fb7acb83701ea057763a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 18 Nov 2020 16:55:07 +0100 Subject: [PATCH 033/108] fixed typo --- .../dhp/oa/graph/dump/community/SparkPrepareResultProject.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java index 39850b5b8..612af6d16 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java @@ -34,7 +34,7 @@ import scala.Tuple2; /** * Preparation of the Project information to be added to the dumped results. For each result associated to at least one - * Project, a serialization of an instance af ResultProject closs is done. ResultProject contains the resultId, and the + * Project, a serialization of an instance af ResultProject class is done. ResultProject contains the resultId, and the * list of Projects (as in eu.dnetlib.dhp.schema.dump.oaf.community.Project) it is associated to */ public class SparkPrepareResultProject implements Serializable { From 5402062ff52df8183016d248f70e4aa95f737dc3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 18 Nov 2020 16:58:20 +0100 Subject: [PATCH 034/108] changed parameter file with the ono associated to the job --- .../oa/graph/dump/funderresults/SparkPrepareResultProject.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java index 179c449f6..9b254088a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java @@ -38,7 +38,7 @@ public class SparkPrepareResultProject implements Serializable { .toString( SparkPrepareResultProject.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); From 906db690d2c4c1cfd69a05c5d5f5dc75db2229e9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 18 Nov 2020 17:43:08 +0100 Subject: [PATCH 035/108] - --- .../SparkPrepareResultProject.java | 47 +------------------ 1 file changed, 1 insertion(+), 46 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java index 9b254088a..21b4f4dc7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java @@ -65,7 +65,7 @@ public class SparkPrepareResultProject implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - prepareResultProjectList2(spark, inputPath, outputPath, communityMapPath); + prepareResultProjectList(spark, inputPath, outputPath, communityMapPath); }); } @@ -115,51 +115,6 @@ public class SparkPrepareResultProject implements Serializable { .option("compression", "gzip") .json(outputPath + "/" + funder); }); - - } - - private static void prepareResultProjectList2(SparkSession spark, String inputPath, String outputPath, - String communityMapPath) { - - CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); - - Dataset relation = Utils - .readPath(spark, inputPath + "/relation", Relation.class) - .filter("dataInfo.deletedbyinference = false and relClass = 'produces'"); - - Dataset result = Utils - .readPath(spark, inputPath + "/publication", eu.dnetlib.dhp.schema.oaf.Result.class) - .union(Utils.readPath(spark, inputPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Result.class)) - .union(Utils.readPath(spark, inputPath + "/otherresearchproduct", eu.dnetlib.dhp.schema.oaf.Result.class)) - .union(Utils.readPath(spark, inputPath + "/software", eu.dnetlib.dhp.schema.oaf.Result.class)); - - result - .joinWith(relation, result.col("id").equalTo(relation.col("target"))) - .groupByKey( - (MapFunction, String>) value -> value - ._2() - .getSource() - .substring(3, 15), - Encoders.STRING()) - .mapGroups( - (MapGroupsFunction, String>) (s, it) -> { - Tuple2 first = it.next(); - List resultList = new ArrayList<>(); - resultList.add(ResultMapper.map(first._1(), communityMap, true)); - it.forEachRemaining(c -> { - resultList.add(ResultMapper.map(c._1(), communityMap, true)); - - }); - spark - .createDataFrame(resultList, Result.class) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "/" + s); - - return new String(); - }, Encoders.STRING()); - } } From fafb6888871367a18bebf3b9f1b1dff3dd369174 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 18 Nov 2020 18:56:48 +0100 Subject: [PATCH 036/108] - --- .../dump/funderresults/FunderResults.java | 13 +++-- ...oject.java => SparkDumpFunderResults.java} | 53 ++++++------------- .../dump/funderresults/oozie_app/workflow.xml | 2 +- 3 files changed, 23 insertions(+), 45 deletions(-) rename dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/{SparkPrepareResultProject.java => SparkDumpFunderResults.java} (66%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java index 18884a327..c3d2d4c2c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java @@ -2,18 +2,17 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresults; import java.io.Serializable; -import java.util.List; import eu.dnetlib.dhp.schema.dump.oaf.Result; -public class FunderResults implements Serializable { - private List results; +public class FunderResults extends Result implements Serializable { + private String funder_id; - public List getResults() { - return results; + public String getFunder_id() { + return funder_id; } - public void setResults(List results) { - this.results = results; + public void setFunder_id(String funder_id) { + this.funder_id = funder_id; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java similarity index 66% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 21b4f4dc7..f84a5fd11 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -9,7 +9,6 @@ import java.util.*; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -21,7 +20,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.schema.dump.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; @@ -30,13 +28,13 @@ import scala.Tuple2; * Project, a serialization of an instance af ResultProject closs is done. ResultProject contains the resultId, and the * list of Projects (as in eu.dnetlib.dhp.schema.dump.oaf.community.Project) it is associated to */ -public class SparkPrepareResultProject implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkPrepareResultProject.class); +public class SparkDumpFunderResults implements Serializable { + private static final Logger log = LoggerFactory.getLogger(SparkDumpFunderResults.class); public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - SparkPrepareResultProject.class + SparkDumpFunderResults.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json")); @@ -65,12 +63,12 @@ public class SparkPrepareResultProject implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - prepareResultProjectList(spark, inputPath, outputPath, communityMapPath); + writeResultProjectList(spark, inputPath, outputPath, communityMapPath); }); } - private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath, - String communityMapPath) { + private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath, + String communityMapPath) { CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); @@ -85,36 +83,17 @@ public class SparkPrepareResultProject implements Serializable { .union(Utils.readPath(spark, inputPath + "/software", eu.dnetlib.dhp.schema.oaf.Result.class)); result - .joinWith(relation, result.col("id").equalTo(relation.col("target"))) - .groupByKey( - (MapFunction, String>) value -> value - ._2() - .getSource() - .substring(3, 15), - Encoders.STRING()) - .mapGroups( - (MapGroupsFunction, Tuple2>) ( - s, it) -> { - Tuple2 first = it.next(); - FunderResults fr = new FunderResults(); - List resultList = new ArrayList<>(); - resultList.add(ResultMapper.map(first._1(), communityMap, true)); - it.forEachRemaining(c -> { - resultList.add(ResultMapper.map(c._1(), communityMap, true)); + .joinWith(relation, result.col("id").equalTo(relation.col("target")), "inner") + .map((MapFunction, FunderResults>) value ->{ + FunderResults res = (FunderResults) ResultMapper.map(value._1(), communityMap, false); + res.setFunder_id(value._2().getSource().substring(3,15)); + return res; + }, Encoders.bean(FunderResults.class)) + .write() + .partitionBy("funder_id") + .mode(SaveMode.Overwrite) + .json(outputPath); - }); - fr.setResults(resultList); - return new Tuple2<>(s, fr); - }, Encoders.tuple(Encoders.STRING(), Encoders.bean(FunderResults.class))) - .foreach(t -> { - String funder = t._1(); - spark - .createDataFrame(t._2.getResults(), Result.class) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "/" + funder); - }); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml index 5ddab3398..aca8f9714 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml @@ -137,7 +137,7 @@ yarn cluster Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkPrepareResultProject + eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} From 24c56fa7a3584c4a258e9674e089ce36f23b7125 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 19 Nov 2020 19:15:39 +0100 Subject: [PATCH 037/108] new logic and workflow for dump of results with link to projects. In this implementation the result match the model of the communityresult. --- .../dump/funderresults/FunderResults.java | 4 +- .../funderresults/SparkDumpFunderResults.java | 41 +- .../SparkResultLinkedToProject.java | 82 ++++ .../dump/funderresults/oozie_app/workflow.xml | 365 +++++++++++++++++- .../graph/dump/input_parameters_link_prj.json | 36 ++ 5 files changed, 501 insertions(+), 27 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java index c3d2d4c2c..138e262e4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java @@ -3,9 +3,9 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresults; import java.io.Serializable; -import eu.dnetlib.dhp.schema.dump.oaf.Result; +import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; -public class FunderResults extends Result implements Serializable { +public class FunderResults extends CommunityResult implements Serializable { private String funder_id; public String getFunder_id() { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index f84a5fd11..d4e294735 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -17,9 +17,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.api.zenodo.Community; import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; @@ -53,8 +55,8 @@ public class SparkDumpFunderResults implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final String communityMapPath = parser.get("communityMapPath"); - log.info("communityMapPath: {}", communityMapPath); + final String relationPath = parser.get("relationPath"); + log.info("relationPath: {}", relationPath); SparkConf conf = new SparkConf(); @@ -63,36 +65,33 @@ public class SparkDumpFunderResults implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - writeResultProjectList(spark, inputPath, outputPath, communityMapPath); + writeResultProjectList(spark, inputPath, outputPath); }); } - private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath, - String communityMapPath) { - - CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); + private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath) { Dataset relation = Utils .readPath(spark, inputPath + "/relation", Relation.class) .filter("dataInfo.deletedbyinference = false and relClass = 'produces'"); - Dataset result = Utils - .readPath(spark, inputPath + "/publication", eu.dnetlib.dhp.schema.oaf.Result.class) - .union(Utils.readPath(spark, inputPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Result.class)) - .union(Utils.readPath(spark, inputPath + "/otherresearchproduct", eu.dnetlib.dhp.schema.oaf.Result.class)) - .union(Utils.readPath(spark, inputPath + "/software", eu.dnetlib.dhp.schema.oaf.Result.class)); + Dataset result = Utils + .readPath(spark, inputPath + "/publication", CommunityResult.class) + .union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class)) + .union(Utils.readPath(spark, inputPath + "/otherresearchproduct", CommunityResult.class)) + .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); result .joinWith(relation, result.col("id").equalTo(relation.col("target")), "inner") - .map((MapFunction, FunderResults>) value ->{ - FunderResults res = (FunderResults) ResultMapper.map(value._1(), communityMap, false); - res.setFunder_id(value._2().getSource().substring(3,15)); - return res; - }, Encoders.bean(FunderResults.class)) - .write() - .partitionBy("funder_id") - .mode(SaveMode.Overwrite) - .json(outputPath); + .map((MapFunction, FunderResults>) value -> { + FunderResults res = (FunderResults) value._1(); + res.setFunder_id(value._2().getSource().substring(3, 15)); + return res; + }, Encoders.bean(FunderResults.class)) + .write() + .partitionBy("funder_id") + .mode(SaveMode.Overwrite) + .json(outputPath); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java new file mode 100644 index 000000000..456e44447 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -0,0 +1,82 @@ + +package eu.dnetlib.dhp.oa.graph.dump.funderresults; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; + +public class SparkResultLinkedToProject implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(SparkResultLinkedToProject.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkResultLinkedToProject.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String resultClassName = parser.get("resultTableName"); + log.info("resultTableName: {}", resultClassName); + + final String relationPath = parser.get("relationPath"); + log.info("relationPath: {}", relationPath); + + Class inputClazz = (Class) Class.forName(resultClassName); + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + writeResultsLikedToProjects(spark, inputClazz, inputPath, outputPath, relationPath); + }); + } + + private static void writeResultsLikedToProjects(SparkSession spark, Class inputClazz, + String inputPath, String outputPath, String relationPath) { + + Dataset results = Utils.readPath(spark, inputPath, inputClazz); + Dataset relations = Utils + .readPath(spark, relationPath, Relation.class) + .filter("dataInfo.deletedbyinference = false and relClass = 'produces'"); + relations + .joinWith( + results, relations.col("target").equalTo(results.col("id")), + "inner") + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml index aca8f9714..d0acdf051 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml @@ -127,10 +127,367 @@ --nameNode${nameNode} --isLookUpUrl${isLookUpUrl} - + + + + + + + + + + + yarn + cluster + Dump funder results + eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/result/publication + --relationPath${sourcePath}/relation + + + + + + + + yarn + cluster + Dump funder results + eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/dataset + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/result/dataset + --relationPath${sourcePath}/relation + + + + + + + + yarn + cluster + Dump funder results + eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/otherresearchproduct + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/result/otherresearchproduct + --relationPath${sourcePath}/relation + + + + + + + + yarn + cluster + Dump funder results + eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/software + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/result/software + --relationPath${sourcePath}/relation + + + + + + + + + + + + + + + + + yarn + cluster + Dump table publication for community related products + eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/result/publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --outputPath${workingDir}/dump/publication + --communityMapPath${workingDir}/communityMap + + + + + + + + yarn + cluster + Dump table dataset for community related products + eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/result/dataset + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset + --outputPath${workingDir}/dump/dataset + --communityMapPath${workingDir}/communityMap + + + + + + + + yarn + cluster + Dump table ORP for community related products + eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/result/otherresearchproduct + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --outputPath${workingDir}/dump/otherresearchproduct + --communityMapPath${workingDir}/communityMap + + + + + + + + yarn + cluster + Dump table software for community related products + eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/result/software + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software + --outputPath${workingDir}/dump/software + --communityMapPath${workingDir}/communityMap + + + + + + + + + + yarn + cluster + Prepare association result subset of project info + eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath} + --outputPath${workingDir}/preparedInfo + + + + + + + + + + + + + + + yarn + cluster + Extend dumped publications with information about project + eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/dump/publication + --outputPath${workingDir}/ext/publication + --preparedInfoPath${workingDir}/preparedInfo + + + + + + + + yarn + cluster + Extend dumped dataset with information about project + eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/dump/dataset + --outputPath${workingDir}/ext/dataset + --preparedInfoPath${workingDir}/preparedInfo + + + + + + + + yarn + cluster + Extend dumped ORP with information about project + eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/dump/otherresearchproduct + --outputPath${workingDir}/ext/orp + --preparedInfoPath${workingDir}/preparedInfo + + + + + + + + yarn + cluster + Extend dumped software with information about project + eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/dump/software + --outputPath${workingDir}/ext/software + --preparedInfoPath${workingDir}/preparedInfo + + + + + @@ -149,9 +506,9 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --sourcePath${sourcePath} - --outputPath${workingDir}/result - --communityMapPath${workingDir}/communityMap + --sourcePath${workingDir}/ext + --outputPath${outputPath} + --relationPath${sourcePath}/relation diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json new file mode 100644 index 000000000..b1f4c026a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json @@ -0,0 +1,36 @@ +[ + + { + "paramName":"cmp", + "paramLongName":"communityMapPath", + "paramDescription": "the path to the serialization of the community map", + "paramRequired": true + }, + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + } +] + + + From cf3f47563fbca758ba69e6a50a9096fc252c00f4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 19 Nov 2020 19:16:05 +0100 Subject: [PATCH 038/108] new parameter files --- .../dhp/oa/graph/dump/funder_result_parameters.json | 7 ++++++- .../dhp/oa/graph/dump/input_parameters_link_prj.json | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json index f5eb81983..2128c91e4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json @@ -22,5 +22,10 @@ "paramLongName": "communityMapPath", "paramDescription": "the community map path", "paramRequired": true - } + },{ + "paramName": "rp", + "paramLongName": "relationPath", + "paramDescription": "the relationPath", + "paramRequired": true +} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json index b1f4c026a..aab929e43 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json @@ -29,7 +29,12 @@ "paramLongName":"resultTableName", "paramDescription": "the name of the result table we are currently working on", "paramRequired": true - } + }, { + "paramName":"rp", + "paramLongName":"relationPath", + "paramDescription": "the path to the relations", + "paramRequired": true +} ] From 0a9db67eecb01753a9b144bf3dd98ff4ce17f06e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Nov 2020 12:21:33 +0100 Subject: [PATCH 039/108] - --- .../dump/funderresults/SparkResultLinkedToProject.java | 5 +++++ .../dhp/oa/graph/dump/funder_result_parameters.json | 7 +------ .../dhp/oa/graph/dump/input_parameters_link_prj.json | 7 ------- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 456e44447..6d059567c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -8,7 +8,10 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; @@ -18,6 +21,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; +import scala.Tuple2; public class SparkResultLinkedToProject implements Serializable { @@ -74,6 +78,7 @@ public class SparkResultLinkedToProject implements Serializable { .joinWith( results, relations.col("target").equalTo(results.col("id")), "inner") + .map((MapFunction, R>) t2 -> t2._2(), Encoders.bean(inputClazz)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json index 2128c91e4..9a5a553b1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json @@ -17,12 +17,7 @@ "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false }, - { - "paramName": "cmp", - "paramLongName": "communityMapPath", - "paramDescription": "the community map path", - "paramRequired": true - },{ +{ "paramName": "rp", "paramLongName": "relationPath", "paramDescription": "the relationPath", diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json index aab929e43..5c4886741 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json @@ -1,11 +1,4 @@ [ - - { - "paramName":"cmp", - "paramLongName":"communityMapPath", - "paramDescription": "the path to the serialization of the community map", - "paramRequired": true - }, { "paramName":"s", "paramLongName":"sourcePath", From 259c67ce365b58aa052ae2df85a20a4e526ccb2c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Nov 2020 12:32:23 +0100 Subject: [PATCH 040/108] fixed issue in path name --- .../oa/graph/dump/funderresults/SparkDumpFunderResults.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index d4e294735..acafa968a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -65,14 +65,14 @@ public class SparkDumpFunderResults implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - writeResultProjectList(spark, inputPath, outputPath); + writeResultProjectList(spark, inputPath, outputPath, relationPath); }); } - private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath) { + private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath, String relationPath) { Dataset relation = Utils - .readPath(spark, inputPath + "/relation", Relation.class) + .readPath(spark, relationPath + "/relation", Relation.class) .filter("dataInfo.deletedbyinference = false and relClass = 'produces'"); Dataset result = Utils From 97c8111847a148fb738c593136d16934c6be15cf Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Mon, 23 Nov 2020 09:49:22 +0100 Subject: [PATCH 041/108] action to convert lambda file in seq file; spark action to download updated authors --- .../doiboost/orcid/OrcidDownloader.java | 185 +++++++++--------- .../orcid/SparkDownloadOrcidAuthors.java | 166 ++++++++++++++++ .../orcid/SparkGenLastModifiedSeq.java | 99 ++++++++++ .../orcid/SparkOrcidGenerateAuthors.java | 165 ---------------- .../orcid/SparkPartitionLambdaFile.java | 50 ----- .../orcid/model/DownloadedRecordData.java | 14 +- .../gen_orcid_authors_parameters.json | 4 - .../oozie_app/config-default.xml | 22 --- .../orcid_gen_authors/oozie_app/workflow.xml | 83 -------- .../oozie_app/workflow.xml | 122 +++++++++++- .../doiboost/orcid/OrcidClientTest.java | 139 +++++++++++-- .../0000-0001-6645-509X.compressed.base64 | 1 - .../0000-0003-3028-6161.compressed.base64 | 1 + 13 files changed, 608 insertions(+), 443 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java delete mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkOrcidGenerateAuthors.java delete mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPartitionLambdaFile.java delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0001-6645-509X.compressed.base64 create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java index 762d8aecd..be727ab9f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDownloader.java @@ -1,14 +1,15 @@ package eu.dnetlib.doiboost.orcid; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; +import java.io.*; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; import java.util.List; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; @@ -16,6 +17,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; @@ -27,10 +29,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; public class OrcidDownloader extends OrcidDSManager { static final int REQ_LIMIT = 24; -// static final int REQ_MAX_TEST = 100; - static final int RECORD_PARSED_COUNTER_LOG_INTERVAL = 10000; + static final int REQ_MAX_TEST = -1; + static final int RECORD_PARSED_COUNTER_LOG_INTERVAL = 500; static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; - static final String lastUpdate = "2019-09-30 00:00:00"; + static final String lastUpdate = "2020-09-29 00:00:00"; private String lambdaFileName; private String outputPath; private String token; @@ -41,7 +43,7 @@ public class OrcidDownloader extends OrcidDSManager { orcidDownloader.parseLambdaFile(); } - private String downloadRecord(String orcidId) { + private String downloadRecord(String orcidId) throws IOException { try (CloseableHttpClient client = HttpClients.createDefault()) { HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); httpGet.addHeader("Accept", "application/vnd.orcid+xml"); @@ -49,17 +51,23 @@ public class OrcidDownloader extends OrcidDSManager { CloseableHttpResponse response = client.execute(httpGet); if (response.getStatusLine().getStatusCode() != 200) { Log - .warn( + .info( "Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode()); return new String(""); } - return IOUtils.toString(response.getEntity().getContent()); - - } catch (Throwable e) { - Log.warn("Downloading " + orcidId, e.getMessage()); - +// return IOUtils.toString(response.getEntity().getContent()); + return xmlStreamToString(response.getEntity().getContent()); } - return new String(""); + } + + private String xmlStreamToString(InputStream xmlStream) throws IOException { + BufferedReader br = new BufferedReader(new InputStreamReader(xmlStream)); + String line; + StringBuffer buffer = new StringBuffer(); + while ((line = br.readLine()) != null) { + buffer.append(line); + } + return buffer.toString(); } public void parseLambdaFile() throws Exception { @@ -76,90 +84,87 @@ public class OrcidDownloader extends OrcidDSManager { hdfsServerUri .concat(workingPath) .concat(outputPath) - .concat("orcid_records.seq")); - - try (SequenceFile.Writer writer = SequenceFile - .createWriter( - conf, - SequenceFile.Writer.file(hdfsoutputPath), - SequenceFile.Writer.keyClass(Text.class), - SequenceFile.Writer.valueClass(Text.class))) { - - try (BufferedReader br = new BufferedReader(new InputStreamReader(lambdaFileStream))) { - String line; - int nReqTmp = 0; + .concat("updated_xml_authors.seq")); + try (TarArchiveInputStream tais = new TarArchiveInputStream( + new GzipCompressorInputStream(lambdaFileStream))) { + TarArchiveEntry entry = null; + StringBuilder sb = new StringBuilder(); + try (SequenceFile.Writer writer = SequenceFile + .createWriter( + conf, + SequenceFile.Writer.file(hdfsoutputPath), + SequenceFile.Writer.keyClass(Text.class), + SequenceFile.Writer.valueClass(Text.class), + SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec()))) { startDownload = System.currentTimeMillis(); - long startReqTmp = System.currentTimeMillis(); - while ((line = br.readLine()) != null) { - parsedRecordsCounter++; - // skip headers line - if (parsedRecordsCounter == 1) { - continue; - } - String[] values = line.split(","); - List recordInfo = Arrays.asList(values); - String orcidId = recordInfo.get(0); - if (isModified(orcidId, recordInfo.get(3))) { - String record = downloadRecord(orcidId); - downloadedRecordsCounter++; - if (!record.isEmpty()) { - String compressRecord = ArgumentApplicationParser.compressArgument(record); - final Text key = new Text(recordInfo.get(0)); - final Text value = new Text(compressRecord); - - try { + while ((entry = tais.getNextTarEntry()) != null) { + BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from tarInput + String line; + while ((line = br.readLine()) != null) { + String[] values = line.split(","); + List recordInfo = Arrays.asList(values); + int nReqTmp = 0; + long startReqTmp = System.currentTimeMillis(); + // skip headers line + if (parsedRecordsCounter == 0) { + parsedRecordsCounter++; + continue; + } + parsedRecordsCounter++; + String orcidId = recordInfo.get(0); + if (isModified(orcidId, recordInfo.get(3))) { + String record = downloadRecord(orcidId); + downloadedRecordsCounter++; + if (!record.isEmpty()) { +// String compressRecord = ArgumentApplicationParser.compressArgument(record); + final Text key = new Text(recordInfo.get(0)); + final Text value = new Text(record); writer.append(key, value); savedRecordsCounter++; - } catch (IOException e) { - Log.warn("Writing to sequence file: " + e.getMessage()); - Log.warn(e); - throw new RuntimeException(e); + } + } else { + break; + } + long endReq = System.currentTimeMillis(); + nReqTmp++; + if (nReqTmp == REQ_LIMIT) { + long reqSessionDuration = endReq - startReqTmp; + if (reqSessionDuration <= 1000) { + Log + .info( + "\nreqSessionDuration: " + + reqSessionDuration + + " nReqTmp: " + + nReqTmp + + " wait ...."); + Thread.sleep(1000 - reqSessionDuration); + } else { + nReqTmp = 0; + startReqTmp = System.currentTimeMillis(); + } + } + if ((parsedRecordsCounter % RECORD_PARSED_COUNTER_LOG_INTERVAL) == 0) { + Log + .info( + "Current parsed: " + + parsedRecordsCounter + + " downloaded: " + + downloadedRecordsCounter + + " saved: " + + savedRecordsCounter); + if (REQ_MAX_TEST != -1 && parsedRecordsCounter > REQ_MAX_TEST) { + break; } } } - long endReq = System.currentTimeMillis(); - nReqTmp++; - if (nReqTmp == REQ_LIMIT) { - long reqSessionDuration = endReq - startReqTmp; - if (reqSessionDuration <= 1000) { - Log - .warn( - "\nreqSessionDuration: " - + reqSessionDuration - + " nReqTmp: " - + nReqTmp - + " wait ...."); - Thread.sleep(1000 - reqSessionDuration); - } else { - nReqTmp = 0; - startReqTmp = System.currentTimeMillis(); - } - } - -// if (parsedRecordsCounter > REQ_MAX_TEST) { -// break; -// } - if ((parsedRecordsCounter % RECORD_PARSED_COUNTER_LOG_INTERVAL) == 0) { - Log - .info( - "Current parsed: " - + parsedRecordsCounter - + " downloaded: " - + downloadedRecordsCounter - + " saved: " - + savedRecordsCounter); -// if (parsedRecordsCounter > REQ_MAX_TEST) { -// break; -// } - } + long endDownload = System.currentTimeMillis(); + long downloadTime = endDownload - startDownload; + Log.info("Download time: " + ((downloadTime / 1000) / 60) + " minutes"); } - long endDownload = System.currentTimeMillis(); - long downloadTime = endDownload - startDownload; - Log.info("Download time: " + ((downloadTime / 1000) / 60) + " minutes"); } } - lambdaFileStream.close(); Log.info("Download started at: " + new Date(startDownload).toString()); + Log.info("Download ended at: " + new Date(System.currentTimeMillis()).toString()); Log.info("Parsed Records Counter: " + parsedRecordsCounter); Log.info("Downloaded Records Counter: " + downloadedRecordsCounter); Log.info("Saved Records Counter: " + savedRecordsCounter); @@ -185,7 +190,7 @@ public class OrcidDownloader extends OrcidDSManager { token = parser.get("token"); } - private boolean isModified(String orcidId, String modifiedDate) { + public boolean isModified(String orcidId, String modifiedDate) { Date modifiedDateDt = null; Date lastUpdateDt = null; try { @@ -195,7 +200,7 @@ public class OrcidDownloader extends OrcidDSManager { modifiedDateDt = new SimpleDateFormat(DATE_FORMAT).parse(modifiedDate); lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate); } catch (Exception e) { - Log.warn("[" + orcidId + "] Parsing date: ", e.getMessage()); + Log.info("[" + orcidId + "] Parsing date: ", e.getMessage()); return true; } return modifiedDateDt.after(lastUpdateDt); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java new file mode 100644 index 000000000..850a654d4 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -0,0 +1,166 @@ + +package eu.dnetlib.doiboost.orcid; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.util.LongAccumulator; +import org.mortbay.log.Log; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData; +import scala.Tuple2; + +public class SparkDownloadOrcidAuthors { + + static Logger logger = LoggerFactory.getLogger(SparkDownloadOrcidAuthors.class); + static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; + static final String lastUpdate = "2020-09-29 00:00:00"; + + public static void main(String[] args) throws IOException, Exception { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkDownloadOrcidAuthors.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/download_orcid_data.json"))); + parser.parseArgument(args); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); + final String workingPath = parser.get("workingPath"); + logger.info("workingPath: ", workingPath); + final String outputPath = parser.get("outputPath"); + logger.info("outputPath: ", outputPath); + final String token = parser.get("token"); + final String lambdaFileName = parser.get("lambdaFileName"); + logger.info("lambdaFileName: ", lambdaFileName); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + LongAccumulator parsedRecordsAcc = spark.sparkContext().longAccumulator("parsedRecords"); + LongAccumulator modifiedRecordsAcc = spark.sparkContext().longAccumulator("modifiedRecords"); + LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloadedRecords"); + + logger.info("Retrieving data from lamda sequence file"); + JavaPairRDD lamdaFileRDD = sc + .sequenceFile(workingPath + lambdaFileName, Text.class, Text.class); + logger.info("Data retrieved: " + lamdaFileRDD.count()); + + Function, Boolean> isModifiedAfterFilter = data -> { + String orcidId = data._1().toString(); + String lastModifiedDate = data._2().toString(); + parsedRecordsAcc.add(1); + if (isModified(orcidId, lastModifiedDate)) { + modifiedRecordsAcc.add(1); + return true; + } + return false; + }; + + Function, Tuple2> downloadRecordFunction = data -> { + String orcidId = data._1().toString(); + String lastModifiedDate = data._2().toString(); + final DownloadedRecordData downloaded = new DownloadedRecordData(); + downloaded.setOrcidId(orcidId); + downloaded.setLastModifiedDate(lastModifiedDate); + try (CloseableHttpClient client = HttpClients.createDefault()) { + HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); + httpGet.addHeader("Accept", "application/vnd.orcid+xml"); + httpGet.addHeader("Authorization", String.format("Bearer %s", token)); + CloseableHttpResponse response = client.execute(httpGet); + int statusCode = response.getStatusLine().getStatusCode(); + downloaded.setStatusCode(statusCode); + if (statusCode != 200) { + logger + .info( + "Downloading " + orcidId + " status code: " + + response.getStatusLine().getStatusCode()); + return downloaded.toTuple2(); + } + downloadedRecordsAcc.add(1); + long currentDownloaded = downloadedRecordsAcc.value(); + if ((currentDownloaded % 10000) == 0) { + logger.info("Current downloaded: " + currentDownloaded); + } + downloaded + .setCompressedData( + ArgumentApplicationParser + .compressArgument(IOUtils.toString(response.getEntity().getContent()))); + } catch (Throwable e) { + logger.info("Downloading " + orcidId, e.getMessage()); + downloaded.setErrorMessage(e.getMessage()); + return downloaded.toTuple2(); + } + return downloaded.toTuple2(); + }; + + sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true"); + + logger.info("Start execution ..."); +// List> sampleList = lamdaFileRDD.take(500); +// JavaRDD> sampleRDD = sc.parallelize(sampleList); +// sampleRDD + JavaPairRDD authorsModifiedRDD = lamdaFileRDD + .filter(isModifiedAfterFilter); + logger.info("Authors modified count: " + authorsModifiedRDD.count()); + logger.info("Start downloading ..."); + authorsModifiedRDD + .map(downloadRecordFunction) + .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .saveAsNewAPIHadoopFile( + workingPath.concat(outputPath), + Text.class, + Text.class, + SequenceFileOutputFormat.class, + sc.hadoopConfiguration()); + logger.info("parsedRecordsAcc: " + parsedRecordsAcc.value().toString()); + logger.info("modifiedRecordsAcc: " + modifiedRecordsAcc.value().toString()); + logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); + }); + + } + + private static boolean isModified(String orcidId, String modifiedDate) { + Date modifiedDateDt = null; + Date lastUpdateDt = null; + try { + if (modifiedDate.length() != 19) { + modifiedDate = modifiedDate.substring(0, 19); + } + modifiedDateDt = new SimpleDateFormat(DATE_FORMAT).parse(modifiedDate); + lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate); + } catch (Exception e) { + logger.info("[" + orcidId + "] Parsing date: ", e.getMessage()); + return true; + } + return modifiedDateDt.after(lastUpdateDt); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java new file mode 100644 index 000000000..f710635ab --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java @@ -0,0 +1,99 @@ + +package eu.dnetlib.doiboost.orcid; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URI; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.spark.SparkConf; +import org.mortbay.log.Log; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class SparkGenLastModifiedSeq { + private static String hdfsServerUri; + private static String workingPath; + private static String outputPath; + private static String lambdaFileName; + + public static void main(String[] args) throws IOException, Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkGenLastModifiedSeq.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/download_orcid_data.json"))); + parser.parseArgument(args); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + hdfsServerUri = parser.get("hdfsServerUri"); + workingPath = parser.get("workingPath"); + outputPath = parser.get("outputPath"); + lambdaFileName = parser.get("lambdaFileName"); + String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); + + SparkConf sparkConf = new SparkConf(); + runWithSparkSession( + sparkConf, + isSparkSessionManaged, + spark -> { + int rowsNum = 0; + Path output = new Path( + hdfsServerUri + .concat(workingPath) + .concat(outputPath)); + Path hdfsreadpath = new Path(lambdaFileUri); + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath)); + conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); + conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); + FileSystem fs = FileSystem.get(URI.create(hdfsServerUri.concat(workingPath)), conf); + FSDataInputStream lambdaFileStream = fs.open(hdfsreadpath); + try (TarArchiveInputStream tais = new TarArchiveInputStream( + new GzipCompressorInputStream(lambdaFileStream))) { + TarArchiveEntry entry = null; + try (SequenceFile.Writer writer = SequenceFile + .createWriter( + conf, + SequenceFile.Writer.file(output), + SequenceFile.Writer.keyClass(Text.class), + SequenceFile.Writer.valueClass(Text.class), + SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec()))) { + while ((entry = tais.getNextTarEntry()) != null) { + BufferedReader br = new BufferedReader(new InputStreamReader(tais)); + String line; + while ((line = br.readLine()) != null) { + String[] values = line.split(","); + List recordInfo = Arrays.asList(values); + String orcidId = recordInfo.get(0); + final Text key = new Text(orcidId); + final Text value = new Text(recordInfo.get(3)); + writer.append(key, value); + rowsNum++; + } + } + } + } + Log.info("Saved rows from lamda csv tar file: " + rowsNum); + }); + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkOrcidGenerateAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkOrcidGenerateAuthors.java deleted file mode 100644 index 4e18ab840..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkOrcidGenerateAuthors.java +++ /dev/null @@ -1,165 +0,0 @@ - -package eu.dnetlib.doiboost.orcid; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.IOException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.List; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.Text; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.Function; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.util.LongAccumulator; -import org.mortbay.log.Log; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData; -import scala.Tuple2; - -public class SparkOrcidGenerateAuthors { - - static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; - static final String lastUpdate = "2019-09-30 00:00:00"; - - public static void main(String[] args) throws IOException, Exception { - Logger logger = LoggerFactory.getLogger(SparkOrcidGenerateAuthors.class); - logger.info("[ SparkOrcidGenerateAuthors STARTED]"); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkOrcidGenerateAuthors.class - .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json"))); - parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String workingPath = parser.get("workingPath"); - logger.info("workingPath: ", workingPath); - final String outputAuthorsPath = parser.get("outputAuthorsPath"); - logger.info("outputAuthorsPath: ", outputAuthorsPath); - final String token = parser.get("token"); - - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - LongAccumulator parsedRecordsAcc = sc.sc().longAccumulator("parsedRecords"); - LongAccumulator modifiedRecordsAcc = sc.sc().longAccumulator("modifiedRecords"); - LongAccumulator downloadedRecordsAcc = sc.sc().longAccumulator("downloadedRecords"); - LongAccumulator alreadyDownloadedRecords = sc.sc().longAccumulator("alreadyDownloadedRecords"); - JavaRDD lamdaFileRDD = sc.textFile(workingPath + "lamdafiles"); - - JavaRDD downloadedRDD = sc.textFile(workingPath + "downloaded"); - Function getOrcidIdFunction = line -> { - try { - String[] values = line.split(","); - return values[0].substring(1); - } catch (Exception e) { - return new String(""); - } - }; - List downloadedRecords = downloadedRDD.map(getOrcidIdFunction).collect(); - - Function isModifiedAfterFilter = line -> { - String[] values = line.split(","); - String orcidId = values[0]; - parsedRecordsAcc.add(1); - if (isModified(orcidId, values[3])) { - modifiedRecordsAcc.add(1); - return true; - } - return false; - }; - Function isNotDownloadedFilter = line -> { - String[] values = line.split(","); - String orcidId = values[0]; - if (downloadedRecords.contains(orcidId)) { - alreadyDownloadedRecords.add(1); - return false; - } - return true; - }; - Function> downloadRecordFunction = line -> { - String[] values = line.split(","); - String orcidId = values[0]; - String modifiedDate = values[3]; - return downloadRecord(orcidId, modifiedDate, token, downloadedRecordsAcc); - }; - - lamdaFileRDD - .filter(isModifiedAfterFilter) - .filter(isNotDownloadedFilter) - .map(downloadRecordFunction) - .rdd() - .saveAsTextFile(workingPath.concat(outputAuthorsPath)); - }); - - } - - private static boolean isModified(String orcidId, String modifiedDate) { - Date modifiedDateDt = null; - Date lastUpdateDt = null; - try { - if (modifiedDate.length() != 19) { - modifiedDate = modifiedDate.substring(0, 19); - } - modifiedDateDt = new SimpleDateFormat(DATE_FORMAT).parse(modifiedDate); - lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate); - } catch (Exception e) { - Log.warn("[" + orcidId + "] Parsing date: ", e.getMessage()); - return true; - } - return modifiedDateDt.after(lastUpdateDt); - } - - private static Tuple2 downloadRecord(String orcidId, String modifiedDate, String token, - LongAccumulator downloadedRecordsAcc) { - final DownloadedRecordData data = new DownloadedRecordData(); - data.setOrcidId(orcidId); - data.setModifiedDate(modifiedDate); - try (CloseableHttpClient client = HttpClients.createDefault()) { - HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); - httpGet.addHeader("Accept", "application/vnd.orcid+xml"); - httpGet.addHeader("Authorization", String.format("Bearer %s", token)); - CloseableHttpResponse response = client.execute(httpGet); - int statusCode = response.getStatusLine().getStatusCode(); - data.setStatusCode(statusCode); - if (statusCode != 200) { - Log - .warn( - "Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode()); - return data.toTuple2(); - } - downloadedRecordsAcc.add(1); - data - .setCompressedData( - ArgumentApplicationParser.compressArgument(IOUtils.toString(response.getEntity().getContent()))); - } catch (Throwable e) { - Log.warn("Downloading " + orcidId, e.getMessage()); - data.setErrorMessage(e.getMessage()); - return data.toTuple2(); - } - return data.toTuple2(); - } -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPartitionLambdaFile.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPartitionLambdaFile.java deleted file mode 100644 index ca6f0f6c4..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPartitionLambdaFile.java +++ /dev/null @@ -1,50 +0,0 @@ - -package eu.dnetlib.doiboost.orcid; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.IOException; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - -public class SparkPartitionLambdaFile { - - public static void main(String[] args) throws IOException, Exception { - Logger logger = LoggerFactory.getLogger(SparkOrcidGenerateAuthors.class); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkOrcidGenerateAuthors.class - .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json"))); - parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - final String workingPath = parser.get("workingPath"); - - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD lamdaFileRDD = sc.textFile(workingPath + "last_modified.csv"); - - lamdaFileRDD - .repartition(20) - .saveAsTextFile(workingPath.concat("lamdafiles")); - }); - } - -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/DownloadedRecordData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/DownloadedRecordData.java index f66ef82a2..da1a79b19 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/DownloadedRecordData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/model/DownloadedRecordData.java @@ -3,8 +3,6 @@ package eu.dnetlib.doiboost.orcid.model; import java.io.Serializable; -import org.apache.hadoop.io.Text; - import com.google.gson.JsonObject; import scala.Tuple2; @@ -12,7 +10,7 @@ import scala.Tuple2; public class DownloadedRecordData implements Serializable { private String orcidId; - private String modifiedDate; + private String lastModifiedDate; private String statusCode; private String compressedData; private String errorMessage; @@ -20,7 +18,7 @@ public class DownloadedRecordData implements Serializable { public Tuple2 toTuple2() { JsonObject data = new JsonObject(); data.addProperty("statusCode", getStatusCode()); - data.addProperty("modifiedDate", getModifiedDate()); + data.addProperty("lastModifiedDate", getLastModifiedDate()); if (getCompressedData() != null) { data.addProperty("compressedData", getCompressedData()); } @@ -66,11 +64,11 @@ public class DownloadedRecordData implements Serializable { this.compressedData = compressedData; } - public String getModifiedDate() { - return modifiedDate; + public String getLastModifiedDate() { + return lastModifiedDate; } - public void setModifiedDate(String modifiedDate) { - this.modifiedDate = modifiedDate; + public void setLastModifiedDate(String lastModifiedDate) { + this.lastModifiedDate = lastModifiedDate; } } diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json deleted file mode 100644 index 35bfe1b41..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json +++ /dev/null @@ -1,4 +0,0 @@ -[{"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path", "paramRequired": true}, - {"paramName":"t", "paramLongName":"token", "paramDescription": "token to grant access", "paramRequired": true}, - {"paramName":"o", "paramLongName":"outputAuthorsPath", "paramDescription": "the relative folder of the sequencial file to write the authors data", "paramRequired": true} -] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/config-default.xml deleted file mode 100644 index a720e7592..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/config-default.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - jobTracker - hadoop-rm3.garr-pa1.d4science.org:8032 - - - nameNode - hdfs://hadoop-rm1.garr-pa1.d4science.org:8020 - - - queueName - default - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/workflow.xml deleted file mode 100644 index 7ebc5f0a0..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_gen_authors/oozie_app/workflow.xml +++ /dev/null @@ -1,83 +0,0 @@ - - - - workingPath - the working dir base path - - - token - access token - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - outputPath - the working dir base path - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - yarn - cluster - Split_Lambda_File - eu.dnetlib.doiboost.orcid.SparkPartitionLambdaFile - dhp-doiboost-1.2.1-SNAPSHOT.jar - --num-executors 24 --conf spark.yarn.jars="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2" --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - - -w${workingPath}/ - -oauthors/ - -t${token} - - - - - - - - ${jobTracker} - ${nameNode} - yarn - cluster - Gen_Orcid_Authors - eu.dnetlib.doiboost.orcid.SparkOrcidGenerateAuthors - dhp-doiboost-1.2.1-SNAPSHOT.jar - --num-executors 20 --conf spark.yarn.jars="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2" --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - - -w${workingPath}/ - -oauthors/ - -t${token} - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml index a3daab116..5f728d35b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml @@ -14,9 +14,63 @@ the shell command that downloads the lambda file from orcid containing last orcid update informations + + sparkExecutorNumber + 20 + + + sparkDriverMemory + 7G + memory for driver process + + + sparkExecutorMemory + 2G + memory for individual executor + + + sparkExecutorCores + 1 + number of cores used by single executor + + + spark2MaxExecutors + 20 + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + - - + + + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + @@ -26,6 +80,7 @@ + @@ -41,24 +96,77 @@ ${shell_cmd} - + - + ${jobTracker} ${nameNode} eu.dnetlib.doiboost.orcid.OrcidDownloader - -d${workingPathOrcid}/ + -w${workingPath}/ -n${nameNode} - -flast_modified.csv - -odownload/ + -flast_modified.csv.tar + -odownloads/ -t${token} + + + + yarn-cluster + cluster + GenLastModifiedSeq + eu.dnetlib.doiboost.orcid.SparkGenLastModifiedSeq + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + + -w${workingPath}/ + -n${nameNode} + -flast_modified.csv.tar + -olast_modified.seq + -t- + + + + + + + + yarn-cluster + cluster + DownloadOrcidAuthors + eu.dnetlib.doiboost.orcid.SparkDownloadOrcidAuthors + dhp-doiboost-${projectVersion}.jar + + --num-executors=${sparkExecutorNumber} + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + + -w${workingPath}/ + -n${nameNode} + -flast_modified.seq + -odownloads/updated_authors + -t${token} + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 774475626..d6ce99f1c 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -5,17 +5,24 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.*; import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; import java.util.List; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.io.IOUtils; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; +import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull; import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -37,12 +44,49 @@ public class OrcidClientTest { // 'https://api.orcid.org/v3.0/0000-0001-7291-3210/record' @Test - public void downloadTest() throws Exception { - String record = testDownloadRecord("0000-0001-6163-2042"); - File f = new File("/tmp/downloaded_0000-0001-6163-2042.xml"); + private void multipleDownloadTest() throws Exception { + int toDownload = 1; + long start = System.currentTimeMillis(); + OrcidDownloader downloader = new OrcidDownloader(); + TarArchiveInputStream input = new TarArchiveInputStream( + new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar"))); + TarArchiveEntry entry = input.getNextTarEntry(); + BufferedReader br = null; + StringBuilder sb = new StringBuilder(); + int rowNum = 0; + int entryNum = 0; + int modified = 0; + while (entry != null) { + br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput + String line; + while ((line = br.readLine()) != null) { + String[] values = line.toString().split(","); + List recordInfo = Arrays.asList(values); + String orcidId = recordInfo.get(0); + if (downloader.isModified(orcidId, recordInfo.get(3))) { + downloadTest(orcidId); + modified++; + } + rowNum++; + if (modified > toDownload) { + break; + } + } + entryNum++; + entry = input.getNextTarEntry(); + } + long end = System.currentTimeMillis(); + logToFile("start test: " + new Date(start).toString()); + logToFile("end test: " + new Date(end).toString()); + } + + @Test + private void downloadTest(String orcid) throws Exception { + String record = testDownloadRecord(orcid); + String filename = "/tmp/downloaded_".concat(orcid).concat(".xml"); + File f = new File(filename); OutputStream outStream = new FileOutputStream(f); IOUtils.write(record.getBytes(), outStream); - System.out.println("saved to tmp"); } private String testDownloadRecord(String orcidId) throws Exception { @@ -50,7 +94,9 @@ public class OrcidClientTest { HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); httpGet.addHeader("Accept", "application/vnd.orcid+xml"); httpGet.addHeader("Authorization", "Bearer 78fdb232-7105-4086-8570-e153f4198e3d"); + logToFile("start connection: " + new Date(System.currentTimeMillis()).toString()); CloseableHttpResponse response = client.execute(httpGet); + logToFile("end connection: " + new Date(System.currentTimeMillis()).toString()); if (response.getStatusLine().getStatusCode() != 200) { System.out .println("Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode()); @@ -62,7 +108,7 @@ public class OrcidClientTest { return new String(""); } -// @Test + // @Test private void testLambdaFileParser() throws Exception { try (BufferedReader br = new BufferedReader( new InputStreamReader(this.getClass().getResourceAsStream("last_modified.csv")))) { @@ -108,7 +154,7 @@ public class OrcidClientTest { } } -// @Test + // @Test private void getRecordDatestamp() throws ParseException { Date toRetrieveDateDt = new SimpleDateFormat(DATE_FORMAT).parse(toRetrieveDate); Date toNotRetrieveDateDt = new SimpleDateFormat(DATE_FORMAT).parse(toNotRetrieveDate); @@ -126,7 +172,7 @@ public class OrcidClientTest { System.out.println(valueDt.toString()); } -// @Test + // @Test @Ignore private void testModifiedDate() throws ParseException { testDate(toRetrieveDate); @@ -134,14 +180,81 @@ public class OrcidClientTest { testDate(shortDate); } -// @Test - @Ignore - private void testReadBase64CompressedRecord() throws Exception { + @Test + public void testReadBase64CompressedRecord() throws Exception { final String base64CompressedRecord = IOUtils - .toString(getClass().getResourceAsStream("0000-0001-6645-509X.compressed.base64")); + .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord); - System.out.println(recordFromSeqFile); - final String downloadedRecord = testDownloadRecord("0000-0001-6645-509X"); + logToFile("\n\ndownloaded \n\n" + recordFromSeqFile); + final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161"); assertTrue(recordFromSeqFile.equals(downloadedRecord)); } + + @Test + private void lambdaFileReaderTest() throws Exception { + TarArchiveInputStream input = new TarArchiveInputStream( + new GzipCompressorInputStream(new FileInputStream("/develop/last_modified.csv.tar"))); + TarArchiveEntry entry = input.getNextTarEntry(); + BufferedReader br = null; + StringBuilder sb = new StringBuilder(); + int rowNum = 0; + int entryNum = 0; + while (entry != null) { + br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput + String line; + while ((line = br.readLine()) != null) { + String[] values = line.toString().split(","); + List recordInfo = Arrays.asList(values); + assertTrue(recordInfo.size() == 4); + + rowNum++; + if (rowNum == 1) { + assertTrue(recordInfo.get(3).equals("last_modified")); + } else if (rowNum == 2) { + assertTrue(recordInfo.get(0).equals("0000-0002-0499-7333")); + } + } + entryNum++; + assertTrue(entryNum == 1); + entry = input.getNextTarEntry(); + } + } + + @Test + private void lambdaFileCounterTest() throws Exception { + final String lastUpdate = "2020-09-29 00:00:00"; + OrcidDownloader downloader = new OrcidDownloader(); + TarArchiveInputStream input = new TarArchiveInputStream( + new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar"))); + TarArchiveEntry entry = input.getNextTarEntry(); + BufferedReader br = null; + StringBuilder sb = new StringBuilder(); + int rowNum = 0; + int entryNum = 0; + int modified = 0; + while (entry != null) { + br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput + String line; + while ((line = br.readLine()) != null) { + String[] values = line.toString().split(","); + List recordInfo = Arrays.asList(values); + String orcidId = recordInfo.get(0); + if (downloader.isModified(orcidId, recordInfo.get(3))) { + modified++; + } + rowNum++; + } + entryNum++; + entry = input.getNextTarEntry(); + } + logToFile("rowNum: " + rowNum); + logToFile("modified: " + modified); + } + + private void logToFile(String log) + throws IOException { + log = log.concat("\n"); + Path path = Paths.get("/tmp/orcid_log.txt"); + Files.write(path, log.getBytes(), StandardOpenOption.APPEND); + } } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0001-6645-509X.compressed.base64 b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0001-6645-509X.compressed.base64 deleted file mode 100644 index 1b088e061..000000000 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0001-6645-509X.compressed.base64 +++ /dev/null @@ -1 +0,0 @@ -H4sIAAAAAAAAAO1a227bOBB9z1cIepd18SW24aho0wTbAgEWjRdY9I2RaJtbSdSSkhP165eURIm6kHa2SbCLNkBiWDxzhhxyZg7tbN49xZFxhIQinFyZ7sQxDZgEOETJ/sr8Y3trLU2DZiAJQYQTeGUWkJrv/IsNgQEm4bp6MVKQHa5M22E/Fvt1rcViNrfmzupP02AOErpGSQZJAqIr85Bl6dq2Hx8fJ5gEKGR/93ZCbYEQFjDMA5CV01KZNBBhEyKaoSTQW0mgxg6mbCUgg6HGrMEIK5wdILESEEO1VYsRVjGMH1i8DyhVW7WYJhqEYKKJBB8W2ADHsS4A1bhAV1uoRlfjAp2yaWG2S1YIM4AiqrbrIwXDN1g8ah3WgGblMbPWrJwPN9in6gxZKIRJhnYI6mI2BAueXZ5UGaCyrQFNVAjcQcISB+oC0oKEHQhDAqnGpga0WXRE7ABaKaZIf8j7SMHAIvtNbcVHBfLA0gSTQg2uAe0+pREuYhZK3WYJjLD6OwcRC/2pTO/AhC2F5IgCTfLVgO7ZPXVim71hFYLFEOm2tMW02UQhIAFP+pxojm0X186QvSfwiOCjbpoNSNg95JFmV/lof36MgOKc6KI3gJr+hcF+NlX9WJdgKXmqURmRE+RzdsroW+qRLrGxJYsBDe8uvs6qBAzMDphmfuO2AZePq4XY2pVspISVM1zyJCMiHIAI+jDZ2COPa4dayk2dUSL1JEdiJCCwTAErhtkBh/5d2SiskonAcGOrgEMqmj/EiPK+b4Wsq/me464sZ2l53tadrmeLtXc58ZbLry1n32IQ8QjQzIqZeGBBDAWrx7Ztbrnu1puu59P11JksPfdrE/sRm5FlRwDFMPQzkkNpjfXTIZ4Jmoqv7A49s96gxjolKAak0LN0QfU+j+7kpiowdR3SiCZRieSTVplyIWEcEUUPKEIZK85p/hChwKzJxgRYSyJvVXk+2k0abv187rWb1EGP8o1u/QlW3dZLi24lxHqPjjAp1RT1twgkRb4Z6IwO6ATfDsQoKkqs/xmBETIZ0e6GLW2H9LgVe5I2pLqNlmCmLTF120Ovq2gZe9AOa3lEK0Gl5ag0lWxZ6xAhWPSLEqJFJqhFnVB/WnuB6c59qNbG5J5+XSN44aTZ0+qlftg2eEkPWDSPecprY9Aqg2fUyZnlTLfObD2brZ3pZHm5OLNOStOUbjfaWMi47la3XM39Sh/VBqXkaWTfiWPXwFRMte7W0giMiqMvjbVkA7CKtb2yafkkmIpJ0ndaKhmn4uroZi1bF6niG2jCs2pRi1bx1kpdyyYwKg5+edESlABFP3zplOxPbk9wnnaHX9u9zC9VPjpEKZDjQAXYyooU+iFGzfwGg8+iO4Ioh77rTFzXWdnvr69v7u8nPCYTb7X0PNcZ9VNZPctRgknMjv53GBoZAQlF5Q2Wiz2zcQ8Cdu7oafct1/PmwDp1c1FiISyvSc9dOud4llMCoyrZWTHyKYx2o7Qd1PjJGTEbOYkjqJGjuOFJWqZy22XzzApwyG6qly67kCxWjnkqy+0WOSaWWe9LI1BYKAnhE1PNpj4lelqZp+XUmjpbz1szYTt3JjP38hyt3Od9raSXfVR19/TBqHBWEPHjr8192Wr8gl+RSJuzWi5nlrtyp+P3fJ2H3t1/yNS9++uoTn4eMGpsPztAvZCWd4Rrgillt/Q+XfcCoXGsAJXZkqEsOmOLK9g9K1CR9ZFdnBN+kzdu2WnNCTTuQEbQk3HNMp3VvlIXGnflZwfGDhPjI6y+FDC+wBQyJnbHMm7Ze0iMO3yElba7JTg2biIYZATzzzXSA4jwnoDYuEd7lvK0WZRmyhv71KLOb2oK9Hnn5YWam4ryVRqcytlbNznVPF690akcv1SzK/nPangq5An99W8jpIxKXSP4Gf2LlRI+CUAyFERQZJry+DZFuOyb1eeJ6pYjWxRM95fNrJlf+UQfpPPcVOsRS6nKxKebmxvjfXl+60V1x0fUyEBn9LS7rRfvP6rt64/GVlt3vnYXa8ebLJz5T6jt53ObB8OeLl2m2WZvJurP8fviav4cpz+BjF+4znzqzd3TMr5FvryMP5GBPyjjXyC/ZR+/ZPwvGd+Rzh8IQIl1jWOWVkyDf+L/PLMDATSuDyBJYGTdQ67DuYq/ZxUwg/vC+AAoq4fsyXuWtwVF1MA74+bIA/GFlwc2+BHSIgkOBCfoe1kvjC1OuYRPD4WBSi78DRq/szGu+H/p+ddqaiovb9bYVBN4veam8vj/l+6q0PwnNbu7OkOzy3bslxf3ZWNWPThpF4LC91or/va17gefq3e83v0GQZQdAkCgcZPsUQIhQcn+DW4NnbHyqwjxxaP2S0b/YmN3/tnSv/gH9+klwrUpAAA= \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 new file mode 100644 index 000000000..8dc3d32ad --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 @@ -0,0 +1 @@ +H4sIAAAAAAAAAO1dW5fbthF+z6/A2XPal5biRaREqmvlrG+JG6/t4900bd+4JLSCQxIqSa1X+fUFeIUkAiIlkpFs5jRxTQ1mgAEw881gQF7/+Ox74AmGEcLBiyt1pFwBGDjYRcHji6tf799K5hWIYjtwbQ8H8MXVBkZXP85/uA6hg0N3lv4BVna8fHElK+Qfifw7lsaKZkoTdaJeASIgiGYoiGEY2N6Lq2Ucr2ay/PXr1xEOHeSS/z7KQSTnFHkL6K4dO066xWtSkORtXBTFKHDErRiioh1ckZHYMXQFzQqavBWOlzCUAtuH/FYlTd7Kh/4D0fcSrfitSppCG2GIQ4Em6M85rYN9X6SA9PecOp1CPnX6e069It3CZJYkF8Y28iJ+u13KnMPvcPNVKDAjKEbuk9aCkdOfC9rndA1JyIVBjBYIinS2T5zzWayDdAfw2mYEhVZCuIAh2ThQpJCSKG9nu24II0GbjKDcRU+ILEBphSMkXuS7lDkHotnf+a3orznlkmwTHG74xBlBOU8rD298okrRZOU0eav/rW2PqP7QTt8iy9tGMHxCjmDzZQTba/fQii3mhlgIokMkmtKSptxNEbRDh276dShYttt0ZQ/J30P4hOBXUTcLorzdw9oTzCr9dbd/hEGE16FIe3ukV/MfAPnnOrUfs4SY2TzpryzFOkRzyj0i7EvWFV7iWmZa7LGh3mUuapUQ7DVb4iieF2IL4uRxOhBZOJJrZsOyO5yRxFJ42LE9OIfBtVzxOBMoZHmd7ah86zGC8l+cECZbQPJhvMTu/DZxFFLCKYTutcwj3GcVrR98FFG/L7nEq801RdUlxZK08b2mzDR9NlZHlmX9t+S522JP454dxZJPwANRoptz1RRJVSV1eq+NZwZhrIx0TflvofuKNhXD9mzkQ3ceh2vIjDF7uk9PAE3KL/EOO812fhS0XoXIt8ONmMs2UTbPlTN5nRqYzA4JQFNuiWpqWDUlZSqpk3vVnCnaTLNGxsSqqeGsSxSggCcUoQfkoZgY/dX6wUPOVdbJKmBXMmE7mKw7pmsTSdEl1Ugm35ypxshUpmXXtqgr+VUPWMxVNGBm0CU0mT2iJxgkKC2avwwJ2sV0F4uoDjBc2D7yNgnt/PWacIwr+LFE5YzIzJQwj0sgyeDOSLSIGLIrmeG07Xp2PJaQ4w7pFtdk+adgTcgjxWtsywzj5GBIPKgcELEMMsCYI0th+5xmu+/7SLAKSorHVUHP2SNtb+ImYwCrdSyR+I74fVUxjYkyuRLs+9ojlQtmJLpaefZGQoELn4nl2NGByFaINcC3FV3rluWfIqH93/dpJMdDRD9ES9XUbItqoJQyKOZAkwzL1CTTMsfVeInHfQs/VXHZxk88Ngfx1F5DuZFCdtSX2L87B6/WEZDAGy+iiDfc5bltJavY2cSkhAkUwiF6RPQP5/g5qQ1ea03GYTDb/mQ00QdXh4naM08JcgcnJN7fUfKBLZULZ+yNFG9WxaK4WRNkG4J3rwtOe5S1eD7Z3hrO9SmZBFXVp4pSyS+lqsWQ+MY5E1RFSXdHhJBE5V/t0JXtpOevUxgwIuQ/pk/evX7BdOOvtr/6x8oO4wDSX24/mPcfbz7fVfaOiqzVtxB6SVxAc0vzCHqLSnZbVNt+psr8VzkaFtHU9a9FlMTi5OxhGWozkbkUrX0KvoWIoYzRj49Y1Jrwku0mk2cUIgeWbhsYlbyKTKcYgxRUTZAHO1zdmmnaSB2bDZAHOzOLBcERaeD5GOL1qqGjPrErnEUfyRVkha5K3ZarqcBI+tTSLGMP1ahigJQzlPPmFQhLbHB3oREbmVsUwChjvS406kPrrAwRRNqnO+SO2RYtu2SW9YlumWXV2DUnjeVGWqnCShx3fBgoHXLErEAXUo9EM7gpx1dL6BP7FW4KLrsUQnYh9qAUo9iD80/L0pEzj8VLLSaiBEuSpd2Q0JVupXJKkycH25F/6dIwi2bpg4PtXHsz14xSLfbmkPoPDKawbIFoF1YN2TxqyKp2zJDVJkMWD6VMND/aAfojMamHO5Esul8DlBxqxhuAF+C3DfZRUG5F/rpkGWWphnqb3iGi5u/t0PYRLO0yfVireQgf6eB++0+5BdIn9YTjdUC24PzXEhzmjw4bnIPDLGYXRbb/gB7Xia+pNyn12rOwUdfVSbmCajVpKj1x9amt+/zuw08/fXz/ukoiQ3ZYi02Vw5w9iEivZQFO2UXm9YFYm5htC5uY5H8j3TD+dMymVWSiGmA2rWXMtq+XEzFbC1pnZQyYbcBsR2C2l7azhB4OI+pl7xxEDzMBCsC+hA4RnaL3ieiUc0B0ynGITjGOQnTm+SG6e/hsR8COwc2aJk86R3Y7YhoDu/t/f/fATtXGY2VAdiVZ68hui67MBXIyikUp1oHj2oLs2JxiwxTqbkfbzCke0RWW/0F8WiiLUW8FQlXHmmFYk8Z5xYKnXHI4FaYeUE+LqcUjlc/KGGDqdwRTPy03EXKIlw9ccEPAJA6w30KakejKxU6MQ9sDn7OCFsI/wg4xMrAPwKpax6E3/Rj0pqgdANaT8dvH3z17iX27c+D2AYe+fQJw+/jLmQG3+vBC5IzaABglMqhGGFmNvBBe5DS8c8/dMnYhsz1iHtfyeoWQH0PG48TUYgtZsXQ8Xls17kJu25Q8fnv127Vq+0pqHt+sql7ILafh8aAXDYQMEoJG9XMWrQlVjHuFwoyZYY0svUn9HNO5o7Kgp4ln+bMo80DeoKQ8bDv3imNcjBpUzojYpbUxqjJSVd2StZGiqKqlGF9g1KSURiQhoGbcQ39AF8QEgkQouWZCK7Kv6sstmTQTntfzUGxIlJYgw9pCKytzRNJOrdApeFc0q/ITtVbdNd2Pya6tCGeMiaqa1tgSBi+0rVxSVtWc1igdZZW2m28X78BT8+2n729WRsNApkYpKduqVjmpKlmmqUuqpdYsJ2UlbMU0VZx6jmka1NqyQcurEEdRCBdtRzTJbhEED2x/UrI77NkheLW0w0di4z5DFz+Dtx7+Cl7aMdmYmxm4AfckEoI++LTEMYYedEhI5NBEu0MimZvVKsS2syz6Jwpd5EP9q++KONT1pr4ll8Rj2a5b4knpzTXxOtCde+JJbMtFJfxruSke5QGA1Fg1XHVkW5quwy9k51PpJFRGjgfz3cRdooXNTNxcGli1ny8oLW8tMel48qGkpuCf6d+S85UlBG92TMwddhCMN9l4t5tW4Io9xCCIOQ+UKBwFyLfvfOiTkTYdnzcgh8htFZBrUkQWuWQaim7qk4nFrKMO4XhNqceC8X7hcXuT20m0pVjyp3/dvRqR5T4dmePJZKKbvYVcRPjqKXIOC7+MqW4jEiLrnRgefXw4EiopO4iExKbv1EjodMPKyjiDSKjxxTpWwomX61hWf2YkdMJFO5Zl08t2bNvv4Qju2MuIXVzA2+pYy+FsEqM+YS+2kQNCGK2IfYYUWAZ2TDrreRuAHWcdhih4BBEFmDhw1wTx0b+uvQVyIfBRAAlpCc6HkLYL388T1a//5/Xizwk1Ob05cgkch+t5LNvE9jwZPeF7nviLyy9wb/qmL09LrviSVfBiX3eHLvnWSUgQ25m9SEha2SvqujvPSNSpP20hI0FNC3j35s0boOvxEmx5lbsVdBBZYVEMXhUK+DugNhHQdpebpzD1885TdBHKKupE/jKKsOfbMfUoxkixRophHGPVjvFmjeRfRkDb3oR3lZjSLd0YK5apTPtMTB2QehmT22K2wqidraguN20/W2G2d7e9BfPKyhiyFUO2YshWNNbrhWcrwC8wIlYWxRA44SaKh6REDyiOJ613JMfryJCa6BLd8WT0hPB44r/51ESpuzZSE/3XSpQvj+g0M5HWa70hqnncgFubegfiFZK7KlkpF/Sol7jUHMSk5iuad/lfcg6CTalPRtOpQZxHL5HpbjpdLPwyAtT2prq7shhlbFhT09T7mOOaUi9jclvMPmi1sw/V72RpP/vAmr72sw9NDSsrY8g+DNmHIfvQWK8Xnn0YaiXarJU4CdjxRPUL7ni9GBISXQI+noyeQB9P/DefkCh1d6G1Ej1lJMoiCPA5/SojcSHxEqb1E5zSiYipnbjcZMW0hXeEX2IEq1uqppCtofZSJlFT6mVEsO1NbtuZqMxGus8j9sba7qmGTkDFSNG0Pia+sfxTLtpXMvwOLtGrE0U3FKvG1ZGSsoN0iJFcc58mxnU607XRpP43mFi+1abboKZb05OX1hozXR0Z4/7SIUN4nTSWG2mlKv/R7dvAmgfQb+2HMMNMFPO8cu8hiZ2JEcIusMPQ3kQAP8EQePRyPXkCbYKLCPJ4XNK9RncssXrZhXoX5m8PAgscArhYIPpa2xhEyakOTA98CIrMvpk9BNz9+y+e/N59GK8jx/kxHrfLjfCS90G7iAR2cRLoZU/k7DRYXiEk3ymWNlU03VR1RZko2rhRONf/+bLRTzRX73wZtBSzlb0QH9botQ9r9L4utpqdXmzt7zNIw2HNbuPhsOYw67NBk9/LYc2ANc8Ja353yHI44ukyI8qT0VNWlCf+cgOA2jWnue4us+b0rGKCSznG2csFTvXJeR/j2OEzeuoi1083y5cHJ4XwOIgphtfMqazJ6niijhZrzxut3MXflrHvtZ/wl2AYEvixZ9nq2SnaNgkS56bCvGueed6Ajw+jyH6E81d2EOAYFH0E5TdhQDJWkMzF7CT9bUlsZDD3lPd9HHNwenEGZ2YJBpvqMl0EtyrBXvQGg97L6+5Y0f4B0cO52NHnYmbtczGz83MxazZWR4bZ3ldyWvCFrIzhXGw4F6Nkb+zQ24Abj+y97JsSeAFIJ+kHGglSXDNo8mfsQ6IeO0IR/X7jz+uAlpfGdDAognYEwS3ZjUN9aaeuhyeoT/fD68O3cKTF6c2Ri+nYaIDHtNeIgNeJE6OCLbYnRgb7vM4wOkg6eVqEwJuKb2HHnZ5DEkyc/RCR1enEDNY4x+RQmQTu6+XtLxH28GPyznbmQ8yXmhWyWvhY56XFsuOpKmdTNVrhANJPFZjj8eQYU38Momgkf4hqj4xqVUX8vdcyqk0ou41qVbLh9JFptvii8NP3MitjiGqHqJaSJfEsMVyv8cqm1x7T8w8YYhKtktD1Fsb2A/ZQ5NNA9pY8pef19BLlTbTxVzH2iRFwwOeJrIFb5JQffB0i2o4cEE9a706I15EBae8U6xLN/0V7K9T9eWJttQbWZtv56YeVy5o2n/9h5RYw+qf3+A58/PDmYsG4qZ35N027PaL1wq1Y1NBkS1anujac0NbKwTRV33BAKxzkpQW1qqXLdA3cKupUnfb3napMrC8QO4SwR4ewNa4ElJQdh7DqTJuOpnqLVwJO94CsjCGEHULYpJAPPpNlv8jeu5Acy5LpCOj+osHs78gN4AY8evgBxyGycbTxHBiSaNdNo11vE2Gp+mcS89IS9Q3wh9i2Oz/EE9KXL+LJ/xYiWU5vzvaUtruggNeHb/aQtpsAIenjcEbb4Rktd94u5Ii2Ttqo3SPa92iFXPAZRkSes+whH7T1G2WRTfHW8/L/lgKus0sbs/SP+Q//BxvQAv4zvAAA \ No newline at end of file From 5c17e768b26789df1e8bf120eeebab93854a716e Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Mon, 23 Nov 2020 16:01:23 +0100 Subject: [PATCH 042/108] set wf configuration with spark.dynamicAllocation.maxExecutors 20 over 20 input partitions --- .../orcid/SparkDownloadOrcidAuthors.java | 39 +++++++++++++------ .../oozie_app/workflow.xml | 4 +- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 850a654d4..68f44541a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -65,9 +65,14 @@ public class SparkDownloadOrcidAuthors { spark -> { JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - LongAccumulator parsedRecordsAcc = spark.sparkContext().longAccumulator("parsedRecords"); - LongAccumulator modifiedRecordsAcc = spark.sparkContext().longAccumulator("modifiedRecords"); - LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloadedRecords"); + LongAccumulator parsedRecordsAcc = spark.sparkContext().longAccumulator("parsed_records"); + LongAccumulator modifiedRecordsAcc = spark.sparkContext().longAccumulator("to_download_records"); + LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records"); + LongAccumulator errorHTTP403Acc = spark.sparkContext().longAccumulator("error_HTTP_403"); + LongAccumulator errorHTTP409Acc = spark.sparkContext().longAccumulator("error_HTTP_409"); + LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); + LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); + LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); logger.info("Retrieving data from lamda sequence file"); JavaPairRDD lamdaFileRDD = sc @@ -99,6 +104,18 @@ public class SparkDownloadOrcidAuthors { int statusCode = response.getStatusLine().getStatusCode(); downloaded.setStatusCode(statusCode); if (statusCode != 200) { + switch (statusCode) { + case 403: + errorHTTP403Acc.add(1); + case 409: + errorHTTP409Acc.add(1); + case 503: + errorHTTP503Acc.add(1); + case 525: + errorHTTP525Acc.add(1); + default: + errorHTTPGenericAcc.add(1); + } logger .info( "Downloading " + orcidId + " status code: " @@ -106,10 +123,6 @@ public class SparkDownloadOrcidAuthors { return downloaded.toTuple2(); } downloadedRecordsAcc.add(1); - long currentDownloaded = downloadedRecordsAcc.value(); - if ((currentDownloaded % 10000) == 0) { - logger.info("Current downloaded: " + currentDownloaded); - } downloaded .setCompressedData( ArgumentApplicationParser @@ -125,14 +138,11 @@ public class SparkDownloadOrcidAuthors { sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true"); logger.info("Start execution ..."); -// List> sampleList = lamdaFileRDD.take(500); -// JavaRDD> sampleRDD = sc.parallelize(sampleList); -// sampleRDD - JavaPairRDD authorsModifiedRDD = lamdaFileRDD - .filter(isModifiedAfterFilter); + JavaPairRDD authorsModifiedRDD = lamdaFileRDD.filter(isModifiedAfterFilter); logger.info("Authors modified count: " + authorsModifiedRDD.count()); logger.info("Start downloading ..."); authorsModifiedRDD + .repartition(20) .map(downloadRecordFunction) .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) .saveAsNewAPIHadoopFile( @@ -144,6 +154,11 @@ public class SparkDownloadOrcidAuthors { logger.info("parsedRecordsAcc: " + parsedRecordsAcc.value().toString()); logger.info("modifiedRecordsAcc: " + modifiedRecordsAcc.value().toString()); logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); + logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); + logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); + logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); + logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); + logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml index 5f728d35b..1c2a7b588 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml @@ -149,9 +149,9 @@ eu.dnetlib.doiboost.orcid.SparkDownloadOrcidAuthors dhp-doiboost-${projectVersion}.jar - --num-executors=${sparkExecutorNumber} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} From 73dbb79602aa2665b8238bf08feae98d09028545 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:36:15 +0100 Subject: [PATCH 043/108] removed the checl for the community name in the common version on MakeTar --- .../src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java index 4047fdca4..89fa09f3c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java @@ -90,9 +90,9 @@ public class MakeTarArchive implements Serializable { String p_string = p.toString(); if (!p_string.endsWith("_SUCCESS")) { String name = p_string.substring(p_string.lastIndexOf("/") + 1); - if (name.trim().equalsIgnoreCase("communities_infrastructures")) { - name = "communities_infrastructures.json"; - } +// if (name.trim().equalsIgnoreCase("communities_infrastructures")) { +// name = "communities_infrastructures.json"; +// } TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name); entry.setSize(fileStatus.getLen()); current_size += fileStatus.getLen(); From 1832708c42bfddf065ac19db28cf39ce6a328c87 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:37:36 +0100 Subject: [PATCH 044/108] modified boolean variable with string one whcih specify the type of dump we are performing: complete, community or funder --- .../dhp/oa/graph/dump/DumpProducts.java | 22 ++++++++----------- .../dhp/oa/graph/dump/ResultMapper.java | 8 +++---- .../community/SparkDumpCommunityProducts.java | 10 ++++++++- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java index 1279ede53..d20a3036e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java @@ -11,17 +11,12 @@ import java.util.Set; import java.util.stream.Collectors; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; -import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative; import eu.dnetlib.dhp.schema.oaf.*; /** @@ -33,7 +28,7 @@ public class DumpProducts implements Serializable { public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath, Class inputClazz, Class outputClazz, - boolean graph) { + String dumpType) { SparkConf conf = new SparkConf(); @@ -42,7 +37,7 @@ public class DumpProducts implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - execDump(spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, graph); + execDump(spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType); }); } @@ -53,13 +48,13 @@ public class DumpProducts implements Serializable { String communityMapPath, Class inputClazz, Class outputClazz, - boolean graph) { + String dumpType) { CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); Utils .readPath(spark, inputPath, inputClazz) - .map((MapFunction) value -> execMap(value, communityMap, graph), Encoders.bean(outputClazz)) + .map((MapFunction) value -> execMap(value, communityMap, dumpType), Encoders.bean(outputClazz)) .filter(Objects::nonNull) .write() .mode(SaveMode.Overwrite) @@ -70,18 +65,18 @@ public class DumpProducts implements Serializable { private static O execMap(I value, CommunityMap communityMap, - boolean graph) { + String dumpType) { Optional odInfo = Optional.ofNullable(value.getDataInfo()); if (odInfo.isPresent()) { - if (odInfo.get().getDeletedbyinference()) { + if (odInfo.get().getDeletedbyinference() || odInfo.get().getInvisible()) { return null; } } else { return null; } - if (!graph) { + if (Constants.DUMPTYPE.COMMUNITY.getType().equals(dumpType)) { Set communities = communityMap.keySet(); Optional> inputContext = Optional @@ -102,7 +97,8 @@ public class DumpProducts implements Serializable { return null; } } - return (O) ResultMapper.map(value, communityMap, graph); + + return (O) ResultMapper.map(value, communityMap, dumpType); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 4c3bc0dd5..cb052ebaa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -21,10 +21,10 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class ResultMapper implements Serializable { public static Result map( - E in, Map communityMap, boolean graph) { + E in, Map communityMap, String dumpType) { Result out; - if (graph) { + if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { out = new GraphResult(); } else { out = new CommunityResult(); @@ -217,7 +217,7 @@ public class ResultMapper implements Serializable { .ofNullable(input.getInstance()); if (oInst.isPresent()) { - if (graph) { + if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { ((GraphResult) out) .setInstance(oInst.get().stream().map(i -> getGraphInstance(i)).collect(Collectors.toList())); } else { @@ -296,7 +296,7 @@ public class ResultMapper implements Serializable { out.setType(input.getResulttype().getClassid()); } - if (!graph) { + if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { ((CommunityResult) out) .setCollectedfrom( input diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java index c4b89936f..63970d14b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java @@ -4,12 +4,15 @@ package eu.dnetlib.dhp.oa.graph.dump.community; import java.io.Serializable; import java.util.*; +import javax.swing.text.html.Option; + import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; +import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.oaf.Result; @@ -48,6 +51,11 @@ public class SparkDumpCommunityProducts implements Serializable { String communityMapPath = parser.get("communityMapPath"); + final String dumpType = Optional + .ofNullable(parser.get("dumpType")) + .map(String::valueOf) + .orElse("community"); + Class inputClazz = (Class) Class.forName(resultClassName); DumpProducts dump = new DumpProducts(); @@ -55,7 +63,7 @@ public class SparkDumpCommunityProducts implements Serializable { dump .run( isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, CommunityResult.class, - false); + dumpType); } From 44db258dc4324d03be64091e114a885c75a9a752 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:38:06 +0100 Subject: [PATCH 045/108] added enumerated for the dump type --- .../eu/dnetlib/dhp/oa/graph/dump/Constants.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java index a466cf074..86a275ae2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java @@ -40,4 +40,18 @@ public class Constants { coarCodeLabelMap.put("c_14cb", "CLOSED"); coarCodeLabelMap.put("c_f1cf", "EMBARGO"); } + + public enum DUMPTYPE { + COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder"); + + private String type; + + DUMPTYPE(String type) { + this.type = type; + } + + public String getType() { + return type; + } + } } From 00c377dac2b53380b24fe75fd908aed5676c15cb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:40:22 +0100 Subject: [PATCH 046/108] added specification of MapFunction types in map --- .../dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java index 1276d8495..2b80b1d86 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java @@ -8,6 +8,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -20,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; +import scala.Tuple2; public class SparkUpdateProjectInfo implements Serializable { @@ -73,7 +75,7 @@ public class SparkUpdateProjectInfo implements Serializable { .joinWith( resultProject, result.col("id").equalTo(resultProject.col("resultId")), "left") - .map(value -> { + .map((MapFunction, CommunityResult>) value -> { CommunityResult r = value._1(); Optional.ofNullable(value._2()).ifPresent(rp -> { r.setProjects(rp.getProjectsList()); From 3319440c5348ec03399f7b4e77c7e165ee3de882 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:41:09 +0100 Subject: [PATCH 047/108] changed the direction of the relation between projects and result considered to select the results linked to projects --- .../graph/dump/community/SparkPrepareResultProject.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java index 612af6d16..2d43888b4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java @@ -76,16 +76,16 @@ public class SparkPrepareResultProject implements Serializable { private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath) { Dataset relation = Utils .readPath(spark, inputPath + "/relation", Relation.class) - .filter("dataInfo.deletedbyinference = false and relClass = 'produces'"); + .filter("dataInfo.deletedbyinference = false and lower(relClass) = 'isproducedby'"); Dataset projects = Utils .readPath(spark, inputPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class); projects - .joinWith(relation, projects.col("id").equalTo(relation.col("source"))) + .joinWith(relation, projects.col("id").equalTo(relation.col("target")), "inner") .groupByKey( (MapFunction, String>) value -> value ._2() - .getTarget(), + .getSource(), Encoders.STRING()) .mapGroups( (MapGroupsFunction, ResultProject>) (s, @@ -93,7 +93,7 @@ public class SparkPrepareResultProject implements Serializable { Set projectSet = new HashSet<>(); Tuple2 first = it.next(); ResultProject rp = new ResultProject(); - rp.setResultId(first._2().getTarget()); + rp.setResultId(s); eu.dnetlib.dhp.schema.oaf.Project p = first._1(); projectSet.add(p.getId()); Project ps = getProject(p); From 62928ef7a5a68c8c45fb9f7662b4bb25153b908d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:42:41 +0100 Subject: [PATCH 048/108] changed to save the communities_infrastructures information as the other entity dumps: in a json.gz file --- .../dump/complete/CreateContextEntities.java | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index 23224f8db..ccb84c713 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -1,22 +1,25 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; -import java.io.BufferedWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Serializable; +import java.io.*; import java.nio.charset.StandardCharsets; import java.util.function.Consumer; import java.util.function.Function; +import org.apache.commons.crypto.utils.IoUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.compress.CompressionOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative; @@ -76,8 +79,11 @@ public class CreateContextEntities implements Serializable { } else { fsDataOutputStream = fileSystem.create(hdfsWritePath); } + CompressionCodecFactory factory = new CompressionCodecFactory(conf); + CompressionCodec codec = factory.getCodecByClassName("org.apache.hadoop.io.compress.GzipCodec"); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(codec.createOutputStream(fsDataOutputStream), + StandardCharsets.UTF_8)); } From 7e940f1991c1755d2ec51b11e85675dee212b5f7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:43:34 +0100 Subject: [PATCH 049/108] changed to consider the modification for the specification of the type of dump --- .../dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java index 773068dfb..36ced3a4a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java @@ -49,7 +49,7 @@ public class DumpGraphEntities implements Serializable { d .run( isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, GraphResult.class, - true); + eu.dnetlib.dhp.oa.graph.dump.Constants.DUMPTYPE.COMPLETE.getType()); break; case "40": runWithSparkSession( From b9b6bdb2e67f99d14abeaca83783d3450b015fb4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:44:53 +0100 Subject: [PATCH 050/108] fixing issue on previous implementation --- .../funderresults/SparkDumpFunderResults.java | 55 +++++++++++++------ .../SparkResultLinkedToProject.java | 19 +++++-- 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index acafa968a..740a4245d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -9,10 +9,7 @@ import java.util.*; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,6 +19,7 @@ import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; +import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; @@ -69,11 +67,12 @@ public class SparkDumpFunderResults implements Serializable { }); } - private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath, String relationPath) { + private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath, + String relationPath) { Dataset relation = Utils .readPath(spark, relationPath + "/relation", Relation.class) - .filter("dataInfo.deletedbyinference = false and relClass = 'produces'"); + .filter("dataInfo.deletedbyinference = false and relClass = 'isProducedBy'"); Dataset result = Utils .readPath(spark, inputPath + "/publication", CommunityResult.class) @@ -81,18 +80,40 @@ public class SparkDumpFunderResults implements Serializable { .union(Utils.readPath(spark, inputPath + "/otherresearchproduct", CommunityResult.class)) .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); - result - .joinWith(relation, result.col("id").equalTo(relation.col("target")), "inner") - .map((MapFunction, FunderResults>) value -> { - FunderResults res = (FunderResults) value._1(); - res.setFunder_id(value._2().getSource().substring(3, 15)); - return res; - }, Encoders.bean(FunderResults.class)) - .write() - .partitionBy("funder_id") - .mode(SaveMode.Overwrite) - .json(outputPath); + List funderList = relation + .select("target") + .map((MapFunction) value -> value.getString(0).substring(0, 15), Encoders.STRING()) + .distinct() + .collectAsList(); + +// Dataset results = result +// .joinWith(relation, result.col("id").equalTo(relation.col("target")), "inner") +// .map((MapFunction, CommunityResult>) value -> { +// return value._1(); +// }, Encoders.bean(CommunityResult.class)); + + funderList.forEach(funder -> writeFunderResult(funder, result, outputPath)); } + private static void writeFunderResult(String funder, Dataset results, String outputPath) { + + results.map((MapFunction) r -> { + if (!Optional.ofNullable(r.getProjects()).isPresent()) { + return null; + } + for (Project p : r.getProjects()) { + if (p.getId().startsWith(funder)) { + return r; + } + } + return null; + }, Encoders.bean(CommunityResult.class)) + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/" + funder); + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 6d059567c..e5d7e2254 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -10,6 +10,7 @@ import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -70,15 +71,25 @@ public class SparkResultLinkedToProject implements Serializable { private static void writeResultsLikedToProjects(SparkSession spark, Class inputClazz, String inputPath, String outputPath, String relationPath) { - Dataset results = Utils.readPath(spark, inputPath, inputClazz); + Dataset results = Utils + .readPath(spark, inputPath, inputClazz) + .filter("dataInfo.deletedbyinference = false and datainfo.invisible = false"); Dataset relations = Utils .readPath(spark, relationPath, Relation.class) - .filter("dataInfo.deletedbyinference = false and relClass = 'produces'"); + .filter("dataInfo.deletedbyinference = false and lower(relClass) = 'isproducedby'"); + relations .joinWith( - results, relations.col("target").equalTo(results.col("id")), + results, relations.col("source").equalTo(results.col("id")), "inner") - .map((MapFunction, R>) t2 -> t2._2(), Encoders.bean(inputClazz)) + .groupByKey( + (MapFunction, String>) value -> value + ._2() + .getId(), + Encoders.STRING()) + .mapGroups((MapGroupsFunction, R>) (k, it) -> { + return it.next()._2(); + }, Encoders.bean(inputClazz)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") From 35ecea8842886c9952a4b3abfdffdf4514e01b2f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:45:15 +0100 Subject: [PATCH 051/108] changed to consider the modification for the specification of the type of dump --- .../dnetlib/dhp/oa/graph/dump/DumpJobTest.java | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 54a610b9c..271ae8fc1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -8,7 +8,6 @@ import java.util.Arrays; import java.util.List; import org.apache.commons.io.FileUtils; -import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -23,7 +22,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.schema.dump.oaf.Result; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult; import eu.dnetlib.dhp.schema.oaf.Dataset; @@ -155,7 +153,7 @@ public class DumpJobTest { .run( // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - CommunityResult.class, false); + CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -220,7 +218,7 @@ public class DumpJobTest { .run( // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - GraphResult.class, true); + GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -253,7 +251,7 @@ public class DumpJobTest { .run( // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - CommunityResult.class, false); + CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -285,7 +283,7 @@ public class DumpJobTest { .run( // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - CommunityResult.class, false); + CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -321,7 +319,7 @@ public class DumpJobTest { .run( // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class, - CommunityResult.class, false); + CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -357,7 +355,7 @@ public class DumpJobTest { .run( // false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class, - CommunityResult.class, false); + CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -392,7 +390,7 @@ public class DumpJobTest { .run( // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - CommunityResult.class, false); + CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); From 54a309bb6bec5a7b7629c2257485579637e18b96 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:45:30 +0100 Subject: [PATCH 052/108] refactoring --- .../graph/dump/complete/CreateEntityTest.java | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index 411e6f4b0..702811549 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -3,13 +3,25 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import static org.mockito.Mockito.lenient; +import java.io.BufferedWriter; import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.function.Consumer; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -60,6 +72,8 @@ public class CreateEntityTest { private QueryInformationSystem queryInformationSystem; + private static String workingDir; + @BeforeEach public void setUp() throws ISLookUpException { lenient().when(isLookUpService.quickSearchProfile(XQUERY_ENTITY)).thenReturn(communityMap); @@ -67,6 +81,13 @@ public class CreateEntityTest { queryInformationSystem.setIsLookUp(isLookUpService); } + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(eu.dnetlib.dhp.oa.graph.dump.complete.CreateEntityTest.class.getSimpleName()) + .toString(); + } + @Test public void test1() throws ISLookUpException, IOException { List cInfoList = new ArrayList<>(); @@ -123,4 +144,30 @@ public class CreateEntityTest { riList.forEach(c -> System.out.println(new Gson().toJson(c))); } + + @Test + public void test2() throws IOException, ISLookUpException { + LocalFileSystem fs = FileSystem.getLocal(new Configuration()); + + Path hdfsWritePath = new Path(workingDir + "/prova"); + FSDataOutputStream fsDataOutputStream = null; + if (fs.exists(hdfsWritePath)) { + fsDataOutputStream = fs.append(hdfsWritePath); + } else { + fsDataOutputStream = fs.create(hdfsWritePath); + } + CompressionCodecFactory factory = new CompressionCodecFactory(fs.getConf()); + CompressionCodec codec = factory.getCodecByClassName("org.apache.hadoop.io.compress.GzipCodec"); + + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(codec.createOutputStream(fsDataOutputStream), + StandardCharsets.UTF_8)); + + List cInfoList = new ArrayList<>(); + final Consumer consumer = ci -> cInfoList.add(ci); + queryInformationSystem.getContextInformation(consumer); + + List riList = new ArrayList<>(); + cInfoList.forEach(cInfo -> riList.add(Process.getEntity(cInfo))); + + } } From c167a18057f3b5326c76f73ae144cb64c5347762 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:45:50 +0100 Subject: [PATCH 053/108] added new parameter for the dumpType --- .../eu/dnetlib/dhp/oa/graph/dump/input_parameters.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json index b1f4c026a..ca12ae3ec 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json @@ -29,7 +29,12 @@ "paramLongName":"resultTableName", "paramDescription": "the name of the result table we are currently working on", "paramRequired": true - } + },{ + "paramName":"dt", + "paramLongName":"dumpType", + "paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project", + "paramRequired": false +} ] From 7e14452a87ba88657abda8f41ea6731ee5751502 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:46:34 +0100 Subject: [PATCH 054/108] final versione of the wf to get the dump of results associated to at least one funder per funder --- .../dump/funderresults/oozie_app/workflow.xml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml index d0acdf051..d258d4dd4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml @@ -182,7 +182,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${workingDir}/result/dataset --relationPath${sourcePath}/relation @@ -208,7 +208,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${workingDir}/result/otherresearchproduct --relationPath${sourcePath}/relation @@ -234,7 +234,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${workingDir}/result/software --relationPath${sourcePath}/relation @@ -269,9 +269,11 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/result/publication + --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${workingDir}/dump/publication --communityMapPath${workingDir}/communityMap + --dumpTypefunder @@ -295,9 +297,11 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/result/dataset + --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${workingDir}/dump/dataset --communityMapPath${workingDir}/communityMap + --dumpTypefunder @@ -321,9 +325,11 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/result/otherresearchproduct + --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${workingDir}/dump/otherresearchproduct --communityMapPath${workingDir}/communityMap + --dumpTypefunder @@ -347,9 +353,11 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/result/software + --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${workingDir}/dump/software --communityMapPath${workingDir}/communityMap + --dumpTypefunder @@ -508,7 +516,7 @@ --sourcePath${workingDir}/ext --outputPath${outputPath} - --relationPath${sourcePath}/relation + --relationPath${sourcePath} From 39f4a2087300f90bc64cbe928655c13bb0dcc2ff Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 14:47:32 +0100 Subject: [PATCH 055/108] chenged the path and the name for saving the communities_infrastructures dump file --- .../dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml index b809b58fb..8189e2594 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml @@ -368,7 +368,7 @@ eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities - --hdfsPath${workingDir}/collect/communities_infrastructures + --hdfsPath${workingDir}/collect/communities_infrastructures/communities_infrastructure.json.gz --nameNode${nameNode} --isLookUpUrl${isLookUpUrl} From 72bb0fe360747bed534890b72f4142f165a6e8e4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 24 Nov 2020 16:47:07 +0100 Subject: [PATCH 056/108] changed directory name --- .../dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 740a4245d..232459c19 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -77,7 +77,7 @@ public class SparkDumpFunderResults implements Serializable { Dataset result = Utils .readPath(spark, inputPath + "/publication", CommunityResult.class) .union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class)) - .union(Utils.readPath(spark, inputPath + "/otherresearchproduct", CommunityResult.class)) + .union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class)) .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); List funderList = relation From 99a086f0c68f17489e4b39e32e9fbbb24418b21d Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Tue, 24 Nov 2020 17:49:32 +0100 Subject: [PATCH 057/108] max concurrent executors set to 10, according to ORCID Director of Technology mail request --- .../orcid/SparkDownloadOrcidAuthors.java | 17 +++++-- .../oozie_app/workflow.xml | 6 +-- .../doiboost/orcid/OrcidClientTest.java | 47 +++++++++++++++++-- 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 68f44541a..598835a00 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -100,7 +100,13 @@ public class SparkDownloadOrcidAuthors { HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); httpGet.addHeader("Accept", "application/vnd.orcid+xml"); httpGet.addHeader("Authorization", String.format("Bearer %s", token)); + long startReq = System.currentTimeMillis(); CloseableHttpResponse response = client.execute(httpGet); + long endReq = System.currentTimeMillis(); + long reqTime = endReq - startReq; + if (reqTime < 1000) { + Thread.sleep(1000 - reqTime); + } int statusCode = response.getStatusLine().getStatusCode(); downloaded.setStatusCode(statusCode); if (statusCode != 200) { @@ -111,15 +117,16 @@ public class SparkDownloadOrcidAuthors { errorHTTP409Acc.add(1); case 503: errorHTTP503Acc.add(1); + throw new RuntimeException("Orcid request rate limit reached (HTTP 503)"); case 525: errorHTTP525Acc.add(1); default: errorHTTPGenericAcc.add(1); + logger + .info( + "Downloading " + orcidId + " status code: " + + response.getStatusLine().getStatusCode()); } - logger - .info( - "Downloading " + orcidId + " status code: " - + response.getStatusLine().getStatusCode()); return downloaded.toTuple2(); } downloadedRecordsAcc.add(1); @@ -142,7 +149,7 @@ public class SparkDownloadOrcidAuthors { logger.info("Authors modified count: " + authorsModifiedRDD.count()); logger.info("Start downloading ..."); authorsModifiedRDD - .repartition(20) + .repartition(10) .map(downloadRecordFunction) .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) .saveAsNewAPIHadoopFile( diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml index 1c2a7b588..b9383558c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_updates_download/oozie_app/workflow.xml @@ -14,10 +14,6 @@ the shell command that downloads the lambda file from orcid containing last orcid update informations - - sparkExecutorNumber - 20 - sparkDriverMemory 7G @@ -35,7 +31,7 @@ spark2MaxExecutors - 20 + 10 oozieActionShareLibForSpark2 diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index d6ce99f1c..66a7badb7 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -10,6 +10,9 @@ import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.time.Duration; +import java.time.LocalDateTime; +import java.time.temporal.TemporalUnit; import java.util.Arrays; import java.util.Date; import java.util.List; @@ -24,6 +27,7 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull; import org.junit.jupiter.api.Test; +import org.mortbay.log.Log; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import jdk.nashorn.internal.ir.annotations.Ignore; @@ -45,7 +49,7 @@ public class OrcidClientTest { @Test private void multipleDownloadTest() throws Exception { - int toDownload = 1; + int toDownload = 10; long start = System.currentTimeMillis(); OrcidDownloader downloader = new OrcidDownloader(); TarArchiveInputStream input = new TarArchiveInputStream( @@ -64,7 +68,7 @@ public class OrcidClientTest { List recordInfo = Arrays.asList(values); String orcidId = recordInfo.get(0); if (downloader.isModified(orcidId, recordInfo.get(3))) { - downloadTest(orcidId); + slowedDownDownload(orcidId); modified++; } rowNum++; @@ -181,7 +185,7 @@ public class OrcidClientTest { } @Test - public void testReadBase64CompressedRecord() throws Exception { + private void testReadBase64CompressedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord); @@ -257,4 +261,41 @@ public class OrcidClientTest { Path path = Paths.get("/tmp/orcid_log.txt"); Files.write(path, log.getBytes(), StandardOpenOption.APPEND); } + + @Test + private void slowedDownDownloadTest() throws Exception { + String orcid = "0000-0001-5496-1243"; + String record = slowedDownDownload(orcid); + String filename = "/tmp/downloaded_".concat(orcid).concat(".xml"); + File f = new File(filename); + OutputStream outStream = new FileOutputStream(f); + IOUtils.write(record.getBytes(), outStream); + } + + private String slowedDownDownload(String orcidId) throws Exception { + try (CloseableHttpClient client = HttpClients.createDefault()) { + HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); + httpGet.addHeader("Accept", "application/vnd.orcid+xml"); + httpGet.addHeader("Authorization", "Bearer 78fdb232-7105-4086-8570-e153f4198e3d"); + long start = System.currentTimeMillis(); + CloseableHttpResponse response = client.execute(httpGet); + long endReq = System.currentTimeMillis(); + long reqSessionDuration = endReq - start; + logToFile("req time (millisec): " + reqSessionDuration); + if (reqSessionDuration < 1000) { + logToFile("wait ...."); + Thread.sleep(1000 - reqSessionDuration); + } + long end = System.currentTimeMillis(); + long total = end - start; + logToFile("total time (millisec): " + total); + if (response.getStatusLine().getStatusCode() != 200) { + logToFile("Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode()); + } + return IOUtils.toString(response.getEntity().getContent()); + } catch (Throwable e) { + e.printStackTrace(); + } + return new String(""); + } } From b37b9352d7cd96febbe2c9aab4a5f22aef43d675 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 13:41:08 +0100 Subject: [PATCH 058/108] added constant value for semantic relationship between projects and results --- .../dnetlib/dhp/oa/graph/dump/Constants.java | 2 ++ .../dump/funderresults/FunderResults.java | 18 ----------- .../ResultLinkedToProjectTest.java | 4 +++ .../dump/funderresult/SplitPerFunderTest.java | 4 +++ .../dump/funderresource/extendeddump/dataset | 0 .../dump/funderresource/extendeddump/orp | 0 .../funderresource/extendeddump/publication | 12 +++++++ .../dump/funderresource/extendeddump/relation | 8 +++++ .../dump/funderresource/extendeddump/software | 0 .../dump/funderresource/match/papers.json | 32 +++++++++++++++++++ .../dump/funderresource/match/relations.json | 30 +++++++++++++++++ .../dump/funderresource/nomatch/papers.json | 32 +++++++++++++++++++ .../funderresource/nomatch/relations.json | 30 +++++++++++++++++ 13 files changed, 154 insertions(+), 18 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/dataset create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/orp create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/publication create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/software create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relations.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/relations.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java index 86a275ae2..c64554555 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java @@ -26,6 +26,8 @@ public class Constants { public static String ORCID = "orcid"; + public static String RESULT_PROJECT_IS_PRODUCED_BY = "isProducedBy"; + static { accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("RESTRICTED", "c_16ec"); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java deleted file mode 100644 index 138e262e4..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/FunderResults.java +++ /dev/null @@ -1,18 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.funderresults; - -import java.io.Serializable; - -import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; - -public class FunderResults extends CommunityResult implements Serializable { - private String funder_id; - - public String getFunder_id() { - return funder_id; - } - - public void setFunder_id(String funder_id) { - this.funder_id = funder_id; - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java new file mode 100644 index 000000000..3696db7f8 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.oa.graph.dump.funderresult; + +public class ResultLinkedToProjectTest { +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java new file mode 100644 index 000000000..8e49b9fc6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.oa.graph.dump.funderresult; + +public class DumpResultPerFunderTest { +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/dataset b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/dataset new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/orp b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/orp new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/publication b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/publication new file mode 100644 index 000000000..901b34abc --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/publication @@ -0,0 +1,12 @@ +{"author":[{"fullname":"Eric A. Wright","name":"Eric A.","pid":{"id":{"scheme":"orcid","value":"0000-0001-8604-7026"}},"rank":1,"surname":"Wright"},{"fullname":"Christopher D. d’Esterre","name":"Christopher D.","rank":2,"surname":"d’Esterre"},{"fullname":"Laura B. Morrison","name":"Laura B.","rank":3,"surname":"Morrison"},{"fullname":"Neil Cockburn","name":"Neil","rank":4,"surname":"Cockburn"},{"fullname":"Michael Kovacs","name":"Michael","rank":5,"surname":"Kovacs"},{"fullname":"Ting-Yim Lee","name":"Ting-Yim","rank":6,"surname":"Lee"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},{"key":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","value":"Europe PubMed Central"}],"context":[{"code":"ni","label":"Neuroinformatics","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.8"}]}],"contributor":["Jiang, Quan"],"country":[],"coverage":[],"dateofcollection":"","description":["CT Perfusion (CTP) derived cerebral blood flow (CBF) thresholds have been proposed as the optimal parameter for distinguishing the infarct core prior to reperfusion. Previous threshold-derivation studies have been limited by uncertainties introduced by infarct expansion between the acute phase of stroke and follow-up imaging, or DWI lesion reversibility. In this study a model is proposed for determining infarction CBF thresholds at 3hr ischemia time by comparing contemporaneously acquired CTP derived CBF maps to 18F-FFMZ-PET imaging, with the objective of deriving a CBF threshold for infarction after 3 hours of ischemia. Endothelin-1 (ET-1) was injected into the brain of Duroc-Cross pigs (n = 11) through a burr hole in the skull. CTP images were acquired 10 and 30 minutes post ET-1 injection and then every 30 minutes for 150 minutes. 370 MBq of 18F-FFMZ was injected ~120 minutes post ET-1 injection and PET images were acquired for 25 minutes starting ~155-180 minutes post ET-1 injection. CBF maps from each CTP acquisition were co-registered and converted into a median CBF map. The median CBF map was co-registered to blood volume maps for vessel exclusion, an average CT image for grey/white matter segmentation, and 18F-FFMZ-PET images for infarct delineation. Logistic regression and ROC analysis were performed on infarcted and non-infarcted pixel CBF values for each animal that developed infarct. Six of the eleven animals developed infarction. The mean CBF value corresponding to the optimal operating point of the ROC curves for the 6 animals was 12.6 ± 2.8 mL·min-1·100g-1 for infarction after 3 hours of ischemia. The porcine ET-1 model of cerebral ischemia is easier to implement then other large animal models of stroke, and performs similarly as long as CBF is monitored using CTP to prevent reperfusion."],"format":[],"id":"50|dedup_wf_001::0017bb7bb6a0e2f5624db2721a1bc1a5","instance":[{"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"license":"http://creativecommons.org/licenses/by/4.0","publicationdate":"2016-06-27","refereed":"UNKNOWN","type":"Other literature type","url":["http://dx.doi.org/10.1371/journal.pone.0158157"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"},"hostedby":{"key":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","value":"Europe PubMed Central"},"publicationdate":"2016-06-01","refereed":"UNKNOWN","type":"Article","url":["http://europepmc.org/articles/PMC4922566"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|doajarticles::830e55b42c4aaa815c19cfa4f2e5855e","value":"PLoS ONE"},"license":"http://creativecommons.org/licenses/by/4.0/","publicationdate":"2016-06-27","refereed":"UNKNOWN","type":"Article","url":["http://dx.plos.org/10.1371/journal.pone.0158157","http://dx.doi.org/10.1371/journal.pone.0158157"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|doajarticles::830e55b42c4aaa815c19cfa4f2e5855e","value":"PLoS ONE"},"license":"cc-by","refereed":"UNKNOWN","type":"Article","url":["https://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0158157&type=printable"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|doajarticles::830e55b42c4aaa815c19cfa4f2e5855e","value":"PLoS ONE"},"refereed":"UNKNOWN","type":"Article","url":["https://www.ncbi.nlm.nih.gov/pubmed/27347877","http://ui.adsabs.harvard.edu/abs/2016PLoSO..1158157W/abstract","https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0158157","https://paperity.org/p/80470187/absolute-cerebral-blood-flow-infarction-threshold-for-3-hour-ischemia-time-determined","https://dx.plos.org/10.1371/journal.pone.0158157","https://academic.microsoft.com/#/detail/2460634846"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},"hostedby":{"key":"10|doajarticles::830e55b42c4aaa815c19cfa4f2e5855e","value":"PLoS ONE"},"publicationdate":"2016-01-01","refereed":"UNKNOWN","type":"Article","url":["http://europepmc.org/articles/PMC4922566?pdf=render","https://doaj.org/toc/1932-6203"]},{"collectedfrom":{"key":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","value":"Europe PubMed Central"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2016-06-27T17:32:46Z","refereed":"UNKNOWN","type":"UNKNOWN","url":["https://dx.doi.org/10.1371/journal.pone.0158157"]}],"language":{"code":"UNKNOWN","label":"UNKNOWN"},"lastupdatetimestamp":1603727328518,"maintitle":"Absolute Cerebral Blood Flow Infarction Threshold for 3-Hour Ischemia Time Determined with CT Perfusion and 18F-FFMZ-PET Imaging in a Porcine Model of Cerebral Ischemia","originalId":["10.1371/journal.pone.0158157","oai:pubmedcentral.nih.gov:4922566","2460634846","oai:doaj.org/article:a0a0f971217e4277aae02c2ddb0243ed","27347877"],"pid":[{"scheme":"doi","value":"10.1371/journal.pone.0158157"},{"scheme":"pmc","value":"PMC4922566"},{"scheme":"pmid","value":"27347877"}],"projects":[{"code":"unidentified","funder":{"jurisdiction":"CA","name":"Canadian Institutes of Health Research","shortName":"CIHR"},"id":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"unidentified"}],"publicationdate":"2016-06-27","publisher":"Public Library of Science (PLoS)","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Research Article"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Medicine and Health Sciences"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Diagnostic Medicine"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Signs and Symptoms"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Infarction"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Pathology and Laboratory Medicine"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Critical Care and Emergency Medicine"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Reperfusion"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Research and Analysis Methods"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Imaging Techniques"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Neuroimaging"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Positron Emission Tomography"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Biology and Life Sciences"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Neuroscience"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Diagnostic Radiology"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Tomography"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Radiology and Imaging"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Neurology"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Cerebral Ischemia"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Vascular Medicine"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Ischemia"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Cerebrovascular Diseases"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Stroke"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Ischemic Stroke"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Computed Axial Tomography"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Model Organisms"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Animal Models"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Pig Models"}},{"subject":{"scheme":"keywords","value":"General Biochemistry, Genetics and Molecular Biology"}},{"subject":{"scheme":"keywords","value":"General Agricultural and Biological Sciences"}},{"subject":{"scheme":"keywords","value":"General Medicine"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Medicine"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"R"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Science"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Q"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.8487"},"subject":{"scheme":"mesheuropmc","value":"cardiovascular system"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.7884"},"subject":{"scheme":"mesheuropmc","value":"cardiovascular diseases"}}],"type":"publication"} +{"author":[{"fullname":"NIOO-KNAW KNAW","name":"NIOO-KNAW","pid":{"id":{"scheme":"orcid","value":"0000-0002-3835-159X"}},"rank":1,"surname":"KNAW"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|opendoar____::e820a45f1dfc7b95282d10b6087e11c0","value":"Repositorio Institucional de la Universidad de Alicante"},{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},{"key":"10|openaire____::fdb035c8b3e0540a8d9a561a6c44f4de","value":"NARCIS"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.8"}]}],"contributor":["Universidad de Alicante. Departamento de Ciencias Ambientales y Recursos Naturales","Universidad de Alicante. Centro Iberoamericano de la Biodiversidad","Biodiversidad y Biotecnología aplicadas a la Biología de la Conservación","Terrestrial Ecology (TE)"],"country":[{"code":"NL","label":"Netherlands","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}},{"code":"ES","label":"Spain","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"2018-10-03T06:57:46.116Z","description":["Herbivory affects subsequent herbivores, mainly regulated by the phytohormones jasmonic (JA) and salicylic acid (SA). Additionally, organisms such as soil microbes belowground or parasitoids that develop inside their herbivorous hosts aboveground, can change plant responses to herbivory. However, it is not yet well known how organisms of trophic levels other than herbivores, below- and above-ground, alter the interactions between insect species sharing a host plant. Here, we investigated whether the parasitoid Aphidius colemani and different soil microbial communities (created through plant-soil feedbacks) affect the JA and SA signalling pathways in response to the aphid Myzus persicae and the thrips Frankliniella occidentalis, as well as subsequent thrips performance. Our results show that the expression of the JA-responsive gene CaPINII in sweet pepper was more suppressed by aphids than by parasitised aphids. However, parasitism did not affect the expression of CaPAL1, a biosynthetic gene of SA. Furthermore, aphid feeding enhanced thrips performance compared with uninfested plants, but this was not observed when aphids were parasitised. Soils where different plant species were previously grown, did not affect plant responses or the interaction between herbivores. Our study shows that members of the third trophic level can modify herbivore interactions by altering plant physiology.\nThis work and T.V.’s activities were supported by Spanish Ministry of Science and Innovation, (CGL2016-79054), the Open Technology Program of NWO (TTW-13848), and the STSM Cost Action FA1405 funded by the European Union. A.P. research activities were supported by the Netherlands Organisation for Scientific Research (NWO, project no. 870.15.080)."],"format":["application/pdf","application/octet-stream"],"id":"50|dedup_wf_001::01644aebfe64e809f593492e515bc070","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::e820a45f1dfc7b95282d10b6087e11c0","value":"Repositorio Institucional de la Universidad de Alicante"},"hostedby":{"key":"10|opendoar____::e820a45f1dfc7b95282d10b6087e11c0","value":"Repositorio Institucional de la Universidad de Alicante"},"license":"Open Access This article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons license, and indicate if changes were made. The images or other third party material in this article are included in the article’s Creative Commons license, unless indicated otherwise in a credit line to the material. If material is not included in the article’s Creative Commons license and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/.","publicationdate":"2018-09-26","refereed":"UNKNOWN","type":"Article","url":["http://hdl.handle.net/10045/80968"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},"hostedby":{"key":"10|doajarticles::70703810b20723fe33cba6ffd7128212","value":"Scientific Reports"},"publicationdate":"2018-09-01","refereed":"UNKNOWN","type":"Article","url":["http://link.springer.com/article/10.1038/s41598-018-32131-9","https://doaj.org/toc/2045-2322"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::fdb035c8b3e0540a8d9a561a6c44f4de","value":"NARCIS"},"hostedby":{"key":"10|opendoar____::97275a23ca44226c9964043c8462be96","value":"KNAW Repository"},"publicationdate":"2018-01-01","refereed":"UNKNOWN","type":"Article","url":["https://pure.knaw.nl/portal/en/publications/4435769d-949e-46a6-81ad-ecf05afc9d8b"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::fdb035c8b3e0540a8d9a561a6c44f4de","value":"NARCIS"},"hostedby":{"key":"10|opendoar____::d709f38ef758b5066ef31b18039b8ce5","value":"Wageningen Yield"},"publicationdate":"2018-01-01","refereed":"UNKNOWN","type":"Article","url":["http://library.wur.nl/WebQuery/wurpubs/541852"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|doajarticles::70703810b20723fe33cba6ffd7128212","value":"Scientific Reports"},"license":"https://creativecommons.org/licenses/by/4.0","publicationdate":"2018-09-26","refereed":"UNKNOWN","type":"Article","url":["http://www.nature.com/articles/s41598-018-32131-9.pdf","http://www.nature.com/articles/s41598-018-32131-9","http://dx.doi.org/10.1038/s41598-018-32131-9"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|doajarticles::70703810b20723fe33cba6ffd7128212","value":"Scientific Reports"},"license":"cc-by","refereed":"UNKNOWN","type":"Article","url":["https://www.nature.com/articles/s41598-018-32131-9.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|doajarticles::70703810b20723fe33cba6ffd7128212","value":"Scientific Reports"},"refereed":"UNKNOWN","type":"Article","url":["https://www.nature.com/articles/s41598-018-32131-9","https://pure.knaw.nl/portal/en/publications/modulation-of-plant-mediated-interactions-between-herbivores-of-d","http://rua.ua.es/dspace/handle/10045/80968","https://www.narcis.nl/publication/RecordID/oai%3Apure.knaw.nl%3Apublications%2F4435769d-949e-46a6-81ad-ecf05afc9d8b","https://ui.adsabs.harvard.edu/abs/2018NatSR...814424V/abstract","http://rua.ua.es/dspace/bitstream/10045/80968/1/2018_Vaello_etal_SciRep.pdf","https://www.nature.com/articles/s41598-018-32131-9.pdf","https://academic.microsoft.com/#/detail/2889807185"]},{"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"license":"http://creativecommons.org/licenses/by/4.0","publicationdate":"2018-09-26","refereed":"UNKNOWN","type":"Other literature type","url":["http://dx.doi.org/10.1038/s41598-018-32131-9"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1603727328518,"maintitle":"Modulation of plant-mediated interactions between herbivores of different feeding guilds: Effects of parasitism and belowground interactions","originalId":["oai:rua.ua.es:10045/80968","oai:doaj.org/article:321ff5dd90ea4933b3fb2a41724400bf","knaw:oai:pure.knaw.nl:publications/4435769d-949e-46a6-81ad-ecf05afc9d8b","wur:oai:library.wur.nl:wurpubs/541852","32131","10.1038/s41598-018-32131-9","2889807185"],"pid":[{"scheme":"doi","value":"10.1038/s41598-018-32131-9"}],"projects":[{"code":"2300188287","funder":{"jurisdiction":"NL","name":"Netherlands Organisation for Scientific Research (NWO)","shortName":"NWO"},"id":"40|nwo_________::dc69ada721bf21ed51055b6421850d73","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Exploring and exploiting phytochemicals to lure hyperparasitoids away from beneficial biocontrol agents"},{"code":"2300189714","funder":{"jurisdiction":"NL","name":"Netherlands Organisation for Scientific Research (NWO)","shortName":"NWO"},"id":"40|nwo_________::39d892199c3b3d3b5ae9869a79e5bba8","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Linking aboveground-belowground interactions and plant-soil feedback to improve pest control and sustainability in greenhouse cut-flowers"}],"publicationdate":"2018-09-26","publisher":"Springer Nature","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Plant-mediated interactions"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Herbivores"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Feeding guilds"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Parasitism"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Belowground interactions"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Zoología"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Medicine"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"R"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Science"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Q"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"international"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Laboratorium voor Entomologie"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"PE&RC"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Laboratory of Entomology"}},{"subject":{"scheme":"keywords","value":"Multidisciplinary"}}],"type":"publication"} +{"author":[{"fullname":"Bonnie Berger","name":"Bonnie","pid":{"id":{"scheme":"orcid","value":"0000-0002-2724-7228"}},"rank":1,"surname":"Berger"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|opendoar____::958adb57686c2fdec5796398de5f317a","value":"MPG.PuRe"},{"key":"10|share_______::bbd802baad85d1fd440f32a7a3a2c2b1","value":"bioRxiv"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|opendoar____::89f0fd5c927d466d6ec9a21b9ac34ffa","value":"eScholarship - University of California"},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"},{"key":"10|opendoar____::8f19793b2671094e63a15ab883d50137","value":"Digital Access to Scholarship at Harvard"},{"key":"10|opendoar____::2a38a4a9316c49e5a833517c45d31070","value":"DSpace@MIT"}],"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.8"}]}],"contributor":["Massachusetts Institute of Technology. Computer Science and Artificial Intelligence Laboratory","Massachusetts Institute of Technology. Department of Mathematics","Lipson, Mark","Loh, Po-Ru","Berger, Bonnie"],"country":[{"code":"DZ","label":"Algeria","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}},{"code":"US","label":"United States","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"2020-09-12T07:56:34.334Z","description":["Austronesian languages are spread across half the globe, from Easter Island to Madagascar. Evidence from linguistics and archaeology indicates that the ‘Austronesian expansion,’ which began 4,000–5,000 years ago, likely had roots in Taiwan, but the ancestry of present-day Austronesian-speaking populations remains controversial. Here, we analyse genome-wide data from 56 populations using new methods for tracing ancestral gene flow, focusing primarily on Island Southeast Asia. We show that all sampled Austronesian groups harbour ancestry that is more closely related to aboriginal Taiwanese than to any present-day mainland population. Surprisingly, western Island Southeast Asian populations have also inherited ancestry from a source nested within the variation of present-day populations speaking Austro-Asiatic languages, which have historically been nearly exclusive to the mainland. Thus, either there was once a substantial Austro-Asiatic presence in Island Southeast Asia, or Austronesian speakers migrated to and through the mainland, admixing there before continuing to western Indonesia.","National Science Foundation (U.S.). Graduate Research Fellowship Program","Simons Foundation","National Institutes of Health (U.S.) (Grant R01GM108348)","National Institutes of Health (U.S.) (Training Grant 5T32HG004947-04)"],"format":["application/pdf"],"id":"50|dedup_wf_001::025dff86cc73c1c865ed35e5b7646e73","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::958adb57686c2fdec5796398de5f317a","value":"MPG.PuRe"},"hostedby":{"key":"10|opendoar____::958adb57686c2fdec5796398de5f317a","value":"MPG.PuRe"},"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","publicationdate":"2014-01-01","refereed":"UNKNOWN","type":"Article","url":["http://hdl.handle.net/11858/00-001M-0000-0024-1F46-B","http://hdl.handle.net/11858/00-001M-0000-0024-1F8F-7","http://hdl.handle.net/11858/00-001M-0000-0024-1F90-1"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|share_______::bbd802baad85d1fd440f32a7a3a2c2b1","value":"bioRxiv"},"hostedby":{"key":"10|share_______::bbd802baad85d1fd440f32a7a3a2c2b1","value":"bioRxiv"},"publicationdate":"2014-05-27","refereed":"nonPeerReviewed","type":"Preprint","url":["http://dx.doi.org/10.1101/005603"]},{"accessright":{"code":"c_16ec","label":"RESTRICTED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2014-05-27","refereed":"UNKNOWN","type":"Preprint","url":["https://syndication.highwire.org/content/doi/10.1101/005603","http://dx.doi.org/10.1101/005603"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"license":"cc-by-nc-nd","refereed":"UNKNOWN","type":"Preprint","url":["https://www.biorxiv.org/content/early/2014/05/27/005603.full.pdf"]},{"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"refereed":"UNKNOWN","type":"Preprint","url":["https://www.biorxiv.org/content/biorxiv/early/2014/05/27/005603.full.pdf","http://www.biorxiv.org/content/biorxiv/early/2014/05/27/005603.full.pdf","https://www.biorxiv.org/content/10.1101/005603v1","https://academic.microsoft.com/#/detail/2951139191"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|doajarticles::d81a03edb7c2e81206ec44c3fb0a02f4","value":"Nature Communications"},"license":"https://creativecommons.org/licenses/by-nc-nd/4.0","publicationdate":"2014-08-19","refereed":"UNKNOWN","type":"Article","url":["http://www.nature.com/articles/ncomms5689.pdf","http://www.nature.com/articles/ncomms5689","http://dx.doi.org/10.1038/ncomms5689"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|doajarticles::d81a03edb7c2e81206ec44c3fb0a02f4","value":"Nature Communications"},"license":"cc-by-nc-nd","refereed":"UNKNOWN","type":"Article","url":["https://www.nature.com/articles/ncomms5689.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|doajarticles::d81a03edb7c2e81206ec44c3fb0a02f4","value":"Nature Communications"},"refereed":"UNKNOWN","type":"Article","url":["https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4143916","https://www.nature.com/articles/ncomms5689","http://ui.adsabs.harvard.edu/abs/2014NatCo...5.4689L/abstract","https://dash.harvard.edu/handle/1/12987285","http://genetics.med.harvard.edu/reichlab/Reich_Lab/Publications_files/8_19_Nature_Communications.pdf","https://escholarship.org/uc/item/5h84k727","http://europepmc.org/articles/PMC4143916","https://core.ac.uk/display/45957627","https://pure.mpg.de/pubman/item/item_2067756_5/component/file_2067755/Stoneking_Reconstructing_Nature_Comm_2014.pdf","http://pubman.mpdl.mpg.de/pubman/item/escidoc:2067756","http://qa-pubman.mpdl.mpg.de/pubman/item/escidoc:2067756","https://dash.harvard.edu/bitstream/handle/1/12987285/4143916.pdf?sequence=1","https://www.nature.com/articles/ncomms5689.pdf","http://genetics.med.harvard.edu/reich/Reich_Lab/Publications_files/8_19_Nature_Communications.pdf","https://academic.microsoft.com/#/detail/2144531681"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::89f0fd5c927d466d6ec9a21b9ac34ffa","value":"eScholarship - University of California"},"hostedby":{"key":"10|opendoar____::89f0fd5c927d466d6ec9a21b9ac34ffa","value":"eScholarship - University of California"},"publicationdate":"2014-08-19","refereed":"UNKNOWN","type":"Article","url":["https://escholarship.org/uc/item/5h84k727"]},{"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"license":"https://creativecommons.org/licenses/by-nc-nd/4.0","publicationdate":"2014-08-19","refereed":"UNKNOWN","type":"Other literature type","url":["http://dx.doi.org/10.1038/ncomms5689"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"},"hostedby":{"key":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","value":"Europe PubMed Central"},"publicationdate":"2014-08-01","refereed":"UNKNOWN","type":"Article","url":["http://europepmc.org/articles/PMC4143916"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::8f19793b2671094e63a15ab883d50137","value":"Digital Access to Scholarship at Harvard"},"hostedby":{"key":"10|opendoar____::8f19793b2671094e63a15ab883d50137","value":"Digital Access to Scholarship at Harvard"},"publicationdate":"2014-01-01","refereed":"UNKNOWN","type":"Article","url":["http://nrs.harvard.edu/urn-3:HUL.InstRepos:12987285"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::2a38a4a9316c49e5a833517c45d31070","value":"DSpace@MIT"},"hostedby":{"key":"10|opendoar____::2a38a4a9316c49e5a833517c45d31070","value":"DSpace@MIT"},"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","publicationdate":"2014-02-01","refereed":"UNKNOWN","type":"Article","url":["http://hdl.handle.net/1721.1/92809","https://orcid.org/0000-0002-2724-7228"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1603727328518,"maintitle":"Reconstructing Austronesian population history in Island Southeast Asia","originalId":["oai:pure.mpg.de:item_2067756","","10.1101/005603","2951139191","BFncomms5689","10.1038/ncomms5689","2144531681","oai:escholarship.org/ark:/13030/qt5h84k727","oai:pubmedcentral.nih.gov:4143916","oai:dash.harvard.edu:1/12987285","oai:dspace.mit.edu:1721.1/92809"],"pid":[{"scheme":"doi","value":"10.1038/ncomms5689"},{"scheme":"doi","value":"10.1101/005603"},{"scheme":"pmc","value":"PMC4143916"},{"scheme":"pmid","value":"25137359"},{"scheme":"doi","value":"10.1038/ncomms5689."}],"projects":[{"code":"1R01GM100233-01","funder":{"fundingStream":"NATIONAL INSTITUTE OF GENERAL MEDICAL SCIENCES","jurisdiction":"US","name":"National Institutes of Health","shortName":"NIH"},"id":"40|nih_________::031bb5f2f70239b3210eda38b2079f67","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Population mixture in evolutionary and medical genetics"},{"code":"2R01GM108348-04A1","funder":{"fundingStream":"NATIONAL INSTITUTE OF GENERAL MEDICAL SCIENCES","jurisdiction":"US","name":"National Institutes of Health","shortName":"NIH"},"id":"40|nih_________::0ce7c76e7717bb6d862673806b7dda80","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Compressive Genomics for Large Omics Data Sets: Algorithms, Applications and Tools"},{"code":"5T32HG004947-04","funder":{"fundingStream":"NATIONAL HUMAN GENOME RESEARCH INSTITUTE","jurisdiction":"US","name":"National Institutes of Health","shortName":"NIH"},"id":"40|nih_________::15668ea4eb881fd2a645ce433189c37f","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"MIT/Whitehead/Broad Computational Genetics Training Program"},{"code":"1032255","funder":{"fundingStream":"Directorate for Social, Behavioral & Economic Sciences","jurisdiction":"US","name":"National Science Foundation","shortName":"NSF"},"id":"40|nsf_________::75f0d51d84ad80ab43c5ceac41a24c58","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"A new history and geography of human genes informed by ancient DNA"}],"publicationdate":"2014-08-19","subjects":[{"subject":{"scheme":"keywords","value":"General Biochemistry, Genetics and Molecular Biology"}},{"subject":{"scheme":"keywords","value":"General Physics and Astronomy"}},{"subject":{"scheme":"keywords","value":"General Chemistry"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Humans"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Sequence Analysis"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"DNA"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Language"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Emigration and Immigration"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Phylogeny"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Gene Frequency"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Genotype"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Haplotypes"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Linkage Disequilibrium"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Polymorphism"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Single Nucleotide"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Geography"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Software"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Asian Continental Ancestry Group"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Ethnic Groups"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Asia"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Southeastern"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Indonesia"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Taiwan"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Gene Flow"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Genetic Variation"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Genome-Wide Association Study"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Islands"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Article"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.891"},"subject":{"scheme":"mesheuropmc","value":"humanities"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.7803"},"subject":{"scheme":"mesheuropmc","value":"parasitic diseases"}}],"type":"publication"} +{"author":[{"fullname":"Anna Zsigmond","name":"Anna","pid":{"id":{"scheme":"orcid","value":"0000-0002-3368-8863"}},"rank":1,"surname":"Zsigmond"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::666894f04637d4dd1861ad117e1c4a66","value":"SNSF P3 Database"},{"key":"10|opendoar____::fc490ca45c00b1249bbe3554a4fdf6fb","value":"CERN Document Server"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"}],"context":[{"code":"egi","label":"EGI Federation","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"2017-10-27T10:13:31.262Z","description":["Abstract The electroweak boson production is an important benchmark measurement in ultra-relativistic heavy-ion collisions which can provide constraints on the nuclear parton distribution functions. In this paper the first results from the proton–lead collision data taken in early 2013 are presented. The Z boson production cross section is measured in the muon decay channel in bins of transverse momentum and rapidity together with the forward–backward ratio. The W production is studied in the muon and electron decay channels and the differential cross sections, lepton-charge and forward–backward asymmetries are computed as a function of the lepton pseudorapidity. All results are compared with theory predictions with and without nuclear modification of the parton distribution functions showing hints of nuclear effects."],"format":[],"id":"50|dedup_wf_001::02cb7e7397a1f775e53ed38b38aba680","instance":[{"accessright":{"code":"c_14cb","label":"CLOSED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::666894f04637d4dd1861ad117e1c4a66","value":"SNSF P3 Database"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2014-01-01","refereed":"peerReviewed","type":"Conference object","url":["http://dx.doi.org/doi:10.1016/j.nuclphysa.2014.07.039"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::fc490ca45c00b1249bbe3554a4fdf6fb","value":"CERN Document Server"},"hostedby":{"key":"10|opendoar____::fc490ca45c00b1249bbe3554a4fdf6fb","value":"CERN Document Server"},"publicationdate":"2014-07-01","refereed":"UNKNOWN","type":"Other literature type","url":["http://cds.cern.ch/record/1746343"]},{"accessright":{"code":"c_16ec","label":"RESTRICTED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|issn___print::430d7bc82c281348c595f0aba4ad70f2","value":"Nuclear Physics A"},"license":"https://www.elsevier.com/tdm/userlicense/1.0/","publicationdate":"2014-08-04T16:17:22Z","refereed":"UNKNOWN","type":"Article","url":["https://api.elsevier.com/content/article/PII:S0375947414002401?httpAccept=text/xml","https://api.elsevier.com/content/article/PII:S0375947414002401?httpAccept=text/plain","http://dx.doi.org/10.1016/j.nuclphysa.2014.07.039"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|issn___print::430d7bc82c281348c595f0aba4ad70f2","value":"Nuclear Physics A"},"refereed":"UNKNOWN","type":"Article","url":["http://cds.cern.ch/record/1746343/files/CR2014_136.pdf"]},{"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|issn___print::430d7bc82c281348c595f0aba4ad70f2","value":"Nuclear Physics A"},"refereed":"UNKNOWN","type":"Article","url":["https://ui.adsabs.harvard.edu/abs/2014NuPhA.931..718Z/abstract","https://www.sciencedirect.com/science/article/pii/S0375947414002401","https://academic.microsoft.com/#/detail/2009363498"]}],"language":{"code":"UNKNOWN","label":"UNKNOWN"},"lastupdatetimestamp":1603727328518,"maintitle":"Z and W boson production in pPb collisions with CMS","originalId":["","oai:cds.cern.ch:1746343","S0375947414002401","10.1016/j.nuclphysa.2014.07.039","2009363498"],"pid":[{"scheme":"doi","value":"doi:10.1016/j.nuclphysa.2014.07.039"},{"scheme":"doi","value":"10.1016/j.nuclphysa.2014.07.039"}],"projects":[{"code":"IZ73Z0_152601","funder":{"fundingStream":"Programmes","jurisdiction":"CH","name":"Swiss National Science Foundation","shortName":"SNSF"},"id":"40|snsf________::549ac0283d2cd3c4ef08792680f8c8c2","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Preparation for and exploitation of the CMS data taking at the next LHC run"}],"publicationdate":"2014-07-01","publisher":"Elsevier,Netherlands","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Detectors and Experimental Techniques"}},{"subject":{"scheme":"keywords","value":"Nuclear and High Energy Physics"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.8784"},"subject":{"scheme":"arxiv","value":"High Energy Physics::Experiment"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.8694"},"subject":{"scheme":"arxiv","value":"Nuclear Experiment"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.7965"},"subject":{"scheme":"arxiv","value":"High Energy Physics::Phenomenology"}}],"type":"publication"} +{"author":[{"fullname":"Srivas Chennu","name":"Srivas","pid":{"id":{"scheme":"orcid","value":"0000-0002-6840-2941"}},"rank":1,"surname":"Chennu"},{"fullname":"Jeffrey Malins","name":"Jeffrey","pid":{"id":{"scheme":"orcid","value":"0000-0002-2932-256X"}},"rank":2,"surname":"Malins"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|openaire____::0a836ef43dcb67bb7cbd4dd509b11b73","value":"CORE (RIOXX-UK Aggregator)"},{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},{"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"},{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"container":{"ep":"799","issnPrinted":"2213-1582","name":"NeuroImage: Clinical","sp":"788","vol":"4"},"context":[{"code":"ni","label":"Neuroinformatics","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.8"}]}],"contributor":[],"country":[{"code":"GB","label":"United Kingdom","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"2020-08-19T07:40:06Z","description":["Functional neuroimaging assessments of residual cognitive capacities, including those that support language, can improve diagnostic and prognostic accuracy in patients with disorders of consciousness. Due to the portability and relative inexpensiveness of electroencephalography, the N400 event-related potential component has been proposed as a clinically valid means to identify preserved linguistic function in non-communicative patients. Across three experiments, we show that changes in both stimuli and task demands significantly influence the probability of detecting statistically significant N400 effects — that is, the difference in N400 amplitudes caused by the experimental manipulation. In terms of task demands, passively heard linguistic stimuli were significantly less likely to elicit N400 effects than task-relevant stimuli. Due to the inability of the majority of patients with disorders of consciousness to follow task commands, the insensitivity of passive listening would impede the identification of residual language abilities even when such abilities exist. In terms of stimuli, passively heard normatively associated word pairs produced the highest detection rate of N400 effects (50% of the participants), compared with semantically-similar word pairs (0%) and high-cloze sentences (17%). This result is consistent with a prediction error account of N400 magnitude, with highly predictable targets leading to smaller N400 waves, and therefore larger N400 effects. Overall, our data indicate that non-repeating normatively associated word pairs provide the highest probability of detecting single-subject N400s during passive listening, and may thereby provide a clinically viable means of assessing residual linguistic function. We also show that more liberal analyses may further increase the detection-rate, but at the potential cost of increased false alarms.","Highlights • The N400 is a candidate marker of linguistic function after severe brain injury. • The probability of detecting N400s is dependent on task demands. • Passive listening is less sensitive than command-following. • The probability of detecting N400s is dependent on stimuli choices. • Word-pairs generated from association norms provide the highest sensitivity."],"format":["application/pdf"],"id":"50|dedup_wf_001::03d1b1b04480058e646b34c174e61082","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a","value":"NeuroImage: Clinical"},"license":"https://www.elsevier.com/tdm/userlicense/1.0/","publicationdate":"2014-05-09T19:02:55Z","refereed":"UNKNOWN","type":"Article","url":["https://api.elsevier.com/content/article/PII:S2213158214000576?httpAccept=text/xml","https://api.elsevier.com/content/article/PII:S2213158214000576?httpAccept=text/plain","http://dx.doi.org/10.1016/j.nicl.2014.05.001"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a","value":"NeuroImage: Clinical"},"license":"cc-by-nc-nd","refereed":"UNKNOWN","type":"Article","url":["https://doi.org/10.1016/j.nicl.2014.05.001"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a","value":"NeuroImage: Clinical"},"refereed":"UNKNOWN","type":"Article","url":["https://www.sciencedirect.com/science/article/pii/S2213158214000576","https://www.uwo.ca/bmi/owenlab/pdf/2014%20-%20Cruse%20-%20NeuroimageClinical.pdf","http://europepmc.org/articles/PMC4055893","https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4055893/","https://ir.lib.uwo.ca/psychologypub/113/","https://core.ac.uk/display/88054806","http://europepmc.org/abstract/MED/24936429","https://ir.lib.uwo.ca/cgi/viewcontent.cgi?article=1121&context=psychologypub","https://kar.kent.ac.uk/54635/","http://www.sciencedirect.com/science/article/pii/S2213158214000576","https://academic.microsoft.com/#/detail/1966079396"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::0a836ef43dcb67bb7cbd4dd509b11b73","value":"CORE (RIOXX-UK Aggregator)"},"hostedby":{"key":"10|openaire____::8f87e10869299a5fe80b315695296b88","value":"Elsevier"},"refereed":"UNKNOWN","type":"Article","url":["http://dx.doi.org/10.1016/j.nicl.2014.05.001"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},"hostedby":{"key":"10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a","value":"NeuroImage: Clinical"},"publicationdate":"2014-01-01","refereed":"UNKNOWN","type":"Article","url":["http://www.sciencedirect.com/science/article/pii/S2213158214000576","https://doaj.org/toc/2213-1582"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"},"hostedby":{"key":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","value":"Europe PubMed Central"},"publicationdate":"2014-05-01","refereed":"UNKNOWN","type":"Article","url":["http://europepmc.org/articles/PMC4055893"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::0a836ef43dcb67bb7cbd4dd509b11b73","value":"CORE (RIOXX-UK Aggregator)"},"hostedby":{"key":"10|opendoar____::4c22bd444899d3b6047a10b20a2f26db","value":"Kent Academic Repository"},"license":"http://creativecommons.org/licenses/by-nc-nd/3.0","publicationdate":"2014-05-09","refereed":"UNKNOWN","type":"Article","url":["https://kar.kent.ac.uk/54635/2/Cruse-2014.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"hostedby":{"key":"10|doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a","value":"NeuroImage: Clinical"},"license":"http://creativecommons.org/licenses/by-nc-nd/3.0/","publicationdate":"2014-01-01","refereed":"UNKNOWN","type":"Article","url":["http://dx.doi.org/10.1016/j.nicl.2014.05.001"]},{"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"license":"http://creativecommons.org/licenses/by-nc-nd/3.0","publicationdate":"2014-01-01","refereed":"UNKNOWN","type":"Other literature type","url":["http://dx.doi.org/10.1016/j.nicl.2014.05.001"]}],"language":{"code":"und","label":"Undetermined"},"lastupdatetimestamp":1603727328518,"maintitle":"The reliability of the N400 in single subjects: Implications for patients with disorders of consciousness","originalId":["S2213158214000576","10.1016/j.nicl.2014.05.001","1966079396","","oai:doaj.org/article:7ea179bda929489e9d83ce74e7835d5c","oai:pubmedcentral.nih.gov:4055893","oai:kar.kent.ac.uk:54635"],"pid":[{"scheme":"doi","value":"10.1016/j.nicl.2014.05.001"},{"scheme":"pmc","value":"PMC4055893"},{"scheme":"pmid","value":"24936429"}],"projects":[{"code":"unidentified","funder":{"jurisdiction":"CA","name":"Canadian Institutes of Health Research","shortName":"CIHR"},"id":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"unidentified"}],"publicationdate":"2014-01-01","publisher":"Elsevier BV","subjects":[{"subject":{"scheme":"keywords","value":"Cognitive Neuroscience"}},{"subject":{"scheme":"keywords","value":"Radiology Nuclear Medicine and imaging"}},{"subject":{"scheme":"keywords","value":"Neurology"}},{"subject":{"scheme":"keywords","value":"Clinical Neurology"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Vegetative state"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Minimally conscious state"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"N400"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Sensitivity"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Language"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Computer applications to medicine. Medical informatics"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"R858-859.7"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Neurology. Diseases of the nervous system"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"RC346-429"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Article"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"BF"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.7803"},"subject":{"scheme":"mesheuropmc","value":"behavioral disciplines and activities"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.7461"},"subject":{"scheme":"mesheuropmc","value":"psychological phenomena and processes"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.7371"},"subject":{"scheme":"mesheuropmc","value":"genetic structures"}}],"type":"publication"} +{"author":[{"fullname":"José J. Fernandez","name":"JOSE JAVIER","pid":{"id":{"scheme":"orcid","value":"0000-0002-0805-8317"}},"rank":1,"surname":"FERNANDEZ"},{"fullname":"Antonio null","name":"Antonio","pid":{"id":{"scheme":"orcid","value":"0000-0001-8376-7941"}},"rank":2},{"fullname":"Janny Alexander Villa-Pulgarin","name":"Janny Alexander","pid":{"id":{"scheme":"orcid","value":"0000-0002-9916-5977"}},"rank":3,"surname":"Villa-Pulgarin"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::4e99a7004945b3f6298b66c2b8dd4fc6","value":"SESAM Publication Database - FP7 KBBE"},{"key":"10|opendoar____::c9f95a0a5af052bffce5c89917335f67","value":"Digital.CSIC"},{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},{"key":"10|openaire____::c2cdfa5866e03cdd07d313cbc8fb8311","value":"Multidisciplinary Digital Publishing Institute"},{"key":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","value":"Europe PubMed Central"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"}],"container":{"edition":"","ep":"2235","iss":"12","issnLinking":"","issnOnline":"1660-3397","issnPrinted":"","name":"Marine Drugs","sp":"2220","vol":"9"},"context":[{"code":"mes","label":"European Marine Science","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"contributor":["Ministerio de Ciencia e Innovación (España)","Agencia Canaria de Investigación, Innovación y Sociedad de la Información","Red Temática de Investigación Cooperativa en Cáncer (España)","Instituto de Salud Carlos III","Junta de Castilla y León"],"country":[{"code":"ES","label":"Spain","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"2015-07-09T12:17:51.487Z","description":["This article is an open access article distributed under the terms and conditions of the Creative Commons Attribution license.-- et al.","The red seaweed Laurencia viridis is a rich source of secondary metabolites derived from squalene. New polyethers, such as iubol (2), 22-hydroxy-15(28)- dehydrovenustatriol (3), 1,2-dehydropseudodehydrothyrsiferol (4), and secodehydrothyrsiferol (5) have been isolated and characterized from this alga. The structures were determined through the interpretation of NMR spectroscopic data and the relative configuration was proposed on the basis of NOESY spectrum and biogenetic considerations. All new compounds exhibited significant cytotoxic activity against a panel of cancer cell lines. © 2011 by the authors.","Financial support was provided by the grants MAREX (FP 7; KBBE-3-245137) (EU), CTQ2008-06754-C04-01 and SAF2008-02251 (MICINN); C2008000145 and EXMAR (ACISII); RD06/0020/1037 from Red Temática de Investigación Cooperativa en Cáncer, Instituto de Salud Carlos III, co-funded by the Fondo Europeo de Desarrollo Regional of the European Union, and Junta de Castilla y León (CSI052A11-2, and GR15-Experimental Therapeutics and Translational Oncology Program). F.C.P. is the recipient of a SEGAI-ULL fellowship.","Peer Reviewed"],"format":["application/pdf"],"id":"50|dedup_wf_001::06496bbae2934f9443037f3df7293791","instance":[{"accessright":{"code":"c_14cb","label":"CLOSED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::4e99a7004945b3f6298b66c2b8dd4fc6","value":"SESAM Publication Database - FP7 KBBE"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2011-01-01","refereed":"UNKNOWN","type":"Article","url":["http://dx.doi.org/10.3390/md9112220"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::c9f95a0a5af052bffce5c89917335f67","value":"Digital.CSIC"},"hostedby":{"key":"10|opendoar____::c9f95a0a5af052bffce5c89917335f67","value":"Digital.CSIC"},"publicationdate":"2011-01-01","refereed":"peerReviewed","type":"Article","url":["http://hdl.handle.net/10261/61853"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},"hostedby":{"key":"10|doajarticles::76bf43665a8619a49db220f559111931","value":"Marine Drugs"},"publicationdate":"2011-11-01","refereed":"UNKNOWN","type":"Article","url":["http://www.mdpi.com/1660-3397/9/11/2220/","https://doaj.org/toc/1660-3397"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::c2cdfa5866e03cdd07d313cbc8fb8311","value":"Multidisciplinary Digital Publishing Institute"},"hostedby":{"key":"10|doajarticles::76bf43665a8619a49db220f559111931","value":"Marine Drugs"},"license":"http://creativecommons.org/licenses/by/3.0/","publicationdate":"2011-11-07","refereed":"UNKNOWN","type":"Other literature type","url":["http://dx.doi.org/10.3390/md9112220"]},{"collectedfrom":{"key":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","value":"Europe PubMed Central"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2011-11-07T16:36:28Z","refereed":"UNKNOWN","type":"UNKNOWN","url":["https://dx.doi.org/10.3390/md9112220"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::c9f95a0a5af052bffce5c89917335f67","value":"Digital.CSIC"},"hostedby":{"key":"10|opendoar____::c9f95a0a5af052bffce5c89917335f67","value":"Digital.CSIC"},"license":"https://creativecommons.org/licenses/by/4.0/","publicationdate":"2011-11-07","refereed":"peerReviewed","type":"Article","url":["http://hdl.handle.net/10261/213287"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|doajarticles::76bf43665a8619a49db220f559111931","value":"Marine Drugs"},"license":"https://creativecommons.org/licenses/by/4.0/","publicationdate":"2011-11-07","refereed":"UNKNOWN","type":"Article","url":["http://www.mdpi.com/1660-3397/9/11/2220/pdf","http://dx.doi.org/10.3390/md9112220"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|doajarticles::76bf43665a8619a49db220f559111931","value":"Marine Drugs"},"license":"cc-by","refereed":"UNKNOWN","type":"Article","url":["https://www.mdpi.com/1660-3397/9/11/2220/pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|doajarticles::76bf43665a8619a49db220f559111931","value":"Marine Drugs"},"refereed":"UNKNOWN","type":"Article","url":["https://www.mdpi.com/1660-3397/9/11/2220/htm","https://www.mdpi.com/1660-3397/9/11/2220/pdf","https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3229232/","http://digital.csic.es/bitstream/10261/61853/1/New%20Polyether.pdf","http://europepmc.org/articles/PMC3229232","https://digital.csic.es/handle/10261/61853","http://digital.csic.es/handle/10261/61853","https://www.researchgate.net/profile/Jose_Fernandez25/publication/51872660_New_Polyether_Triterpenoids_from_Laurencia_viridis_and_Their_Biological_Evaluation/links/0deec532aadcb52043000000.pdf?disableCoverPage=true","https://www.scienceopen.com/document/vid/df8bd74c-d6fa-41bf-ac38-c492ce9d7987","https://academic.microsoft.com/#/detail/2141937370"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"},"hostedby":{"key":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","value":"Europe PubMed Central"},"publicationdate":"2011-11-01","refereed":"UNKNOWN","type":"Article","url":["http://europepmc.org/articles/PMC3229232"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"license":"","publicationdate":"2011-01-01","refereed":"UNKNOWN","type":"Article","url":["http://dx.doi.org/10.3390/md9112220"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1603727328518,"maintitle":"New Polyether Triterpenoids from Laurencia viridis and Their Biological Evaluation","originalId":["","oai:digital.csic.es:10261/61853","oai:doaj.org/article:cd1affc5c8fb4c428b1131e80e8c4f5e","oai:mdpi.com:/1660-3397/9/11/2220/","22163183","10.3390/md9112220","oai:digital.csic.es:10261/213287","md9112220","2141937370","oai:pubmedcentral.nih.gov:3229232","WOS:000298929200005"],"pid":[{"scheme":"doi","value":"10.3390/md9112220"},{"scheme":"handle","value":"10261/61853"},{"scheme":"pmid","value":"22163183"},{"scheme":"handle","value":"10261/213287"},{"scheme":"pmc","value":"PMC3229232"}],"projects":[{"acronym":"MAREX","code":"245137","funder":{"fundingStream":"FP7","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda_______::b06838cf21feb2e82bca4bf4782e90bb","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Exploring Marine Resources for Bioactive Compounds: From Discovery to Sustainable Production and Industrial Applications"}],"publicationdate":"2011-11-07","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Laurencia viridis"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"squalene"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"polyethers"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"cytotoxic activity"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Biology (General)"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"QH301-705.5"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Laurencia viridis"}},{"subject":{"scheme":"keywords","value":"Drug Discovery"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Article"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.72"},"subject":{"scheme":"ACM","value":"ComputingMilieux_LEGALASPECTSOFCOMPUTING"}}],"type":"publication"} +{"author":[{"fullname":"Andrea Danese","name":"Andrea","pid":{"id":{"scheme":"orcid","value":"0000-0001-8718-5412"}},"rank":1,"surname":"Danese"},{"fullname":"Helen Fisher","name":"Helen","pid":{"id":{"scheme":"orcid","value":"0000-0003-4174-2126"}},"rank":2,"surname":"Fisher"},{"fullname":"Terrie Edith Moffitt","name":"Terrie","pid":{"id":{"scheme":"orcid","value":"0000-0002-8589-6760"}},"rank":3,"surname":"Moffitt"},{"fullname":"Jessie Baldwin","name":"Jessie","pid":{"id":{"scheme":"orcid","value":"0000-0002-5703-5058"}},"rank":4,"surname":"Baldwin"},{"fullname":"Louise Arseneault","name":"Louise","pid":{"id":{"scheme":"orcid","value":"0000-0002-2938-2191"}},"rank":5,"surname":"Arseneault"},{"fullname":"Joanne Newbury","name":"Joanne","pid":{"id":{"scheme":"orcid","value":"0000-0002-4080-4033"}},"rank":6,"surname":"Newbury"},{"fullname":"Avshalom Caspi","name":"avshalom","pid":{"id":{"scheme":"orcid","value":"0000-0003-0082-4600"}},"rank":7,"surname":"caspi"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|opendoar____::566a9968b43628588e76be5a85a0f9e8","value":"King's Research Portal"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|opendoar____::5737c6ec2e0716f3d8a7a5c4e0de0d9a","value":"UCL Discovery"},{"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.8"}]}],"contributor":[],"country":[{"code":"GB","label":"United Kingdom","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"2020-10-19T09:59:25.763Z","description":["Abstract Both prospective informant-reports and retrospective self-reports may be used to measure childhood maltreatment, though both methods entail potential limitations such as underestimation and memory biases. The validity and utility of standard measures of childhood maltreatment requires clarification in order to inform the design of future studies investigating the mental health consequences of maltreatment. The present study assessed agreement between prospective informant-reports and retrospective self-reports of childhood maltreatment, as well as the comparative utility of both reports for predicting a range of psychiatric problems at age 18. Data were obtained from the Environmental Risk (E-Risk) Longitudinal Twin Study, a nationally-representative birth cohort of 2232 children followed to 18 years of age (with 93% retention). Childhood maltreatment was assessed in two ways: (i) prospective informant-reports from caregivers, researchers, and clinicians when children were aged 5, 7, 10 and 12; and (ii) retrospective self-reports of maltreatment experiences occurring up to age 12, obtained at age 18 using the Childhood Trauma Questionnaire. Participants were privately interviewed at age 18 concerning several psychiatric problems including depression, anxiety, self-injury, alcohol/cannabis dependence, and conduct disorder. There was only slight to fair agreement between prospective and retrospective reports of childhood maltreatment (all Kappa's ≤ 0.31). Both prospective and retrospective reports of maltreatment were associated with age-18 psychiatric problems, though the strongest associations were found when maltreatment was retrospectively self-reported. These findings indicate that prospective and retrospective reports of childhood maltreatment capture largely non-overlapping groups of individuals. Young adults who recall being maltreated have a particularly elevated risk for psychopathology."],"format":["application/pdf","text"],"id":"50|dedup_wf_001::085f9efc74e1192abca87031fb573d70","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::566a9968b43628588e76be5a85a0f9e8","value":"King's Research Portal"},"hostedby":{"key":"10|opendoar____::566a9968b43628588e76be5a85a0f9e8","value":"King's Research Portal"},"publicationdate":"2018-01-01","refereed":"UNKNOWN","type":"Article","url":["https://kclpure.kcl.ac.uk/portal/en/publications/measuring-childhood-maltreatment-to-predict-earlyadult-psychopathology(6b9fcf93-2732-49fc-9f0d-d6e398c56883).html","https://doi.org/10.1016/j.jpsychires.2017.09.020","https://kclpure.kcl.ac.uk/ws/files/92745969/Measuring_childhood_maltreatment_to_NEWBURY_Firstonline20September2017_GOLD_VoR_CC_BY_.pdf"]},{"accessright":{"code":"c_16ec","label":"RESTRICTED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|issn___print::54ae940914489684875339c973ee4591","value":"Journal of Psychiatric Research"},"license":"https://www.elsevier.com/tdm/userlicense/1.0/","publicationdate":"2017-09-21T17:47:59Z","refereed":"UNKNOWN","type":"Article","url":["https://api.elsevier.com/content/article/PII:S0022395617307975?httpAccept=text/xml","https://api.elsevier.com/content/article/PII:S0022395617307975?httpAccept=text/plain","http://dx.doi.org/10.1016/j.jpsychires.2017.09.020"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|issn___print::54ae940914489684875339c973ee4591","value":"Journal of Psychiatric Research"},"license":"cc-by","refereed":"UNKNOWN","type":"Article","url":["https://doi.org/10.1016/j.jpsychires.2017.09.020"]},{"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|issn___print::54ae940914489684875339c973ee4591","value":"Journal of Psychiatric Research"},"refereed":"UNKNOWN","type":"Article","url":["https://www.sciencedirect.com/science/article/pii/S0022395617307975","https://www.ncbi.nlm.nih.gov/pubmed/28965006","https://kclpure.kcl.ac.uk/portal/en/publications/measuring-childhood-maltreatment-to-predict-earlyadult-psychopathology(6b9fcf93-2732-49fc-9f0d-d6e398c56883).html","https://core.ac.uk/display/96762327","https://kclpure.kcl.ac.uk/portal/files/92745969/Measuring_childhood_maltreatment_to_NEWBURY_Firstonline20September2017_GOLD_VoR_CC_BY_.pdf","https://academic.microsoft.com/#/detail/2760657291"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::5737c6ec2e0716f3d8a7a5c4e0de0d9a","value":"UCL Discovery"},"hostedby":{"key":"10|opendoar____::5737c6ec2e0716f3d8a7a5c4e0de0d9a","value":"UCL Discovery"},"publicationdate":"2018-01-01","refereed":"UNKNOWN","type":"Article","url":["https://discovery.ucl.ac.uk/id/eprint/10075526/","https://discovery.ucl.ac.uk/id/eprint/10075526/1/1-s2.0-S0022395617307975-main.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357","value":"PubMed Central"},"hostedby":{"key":"10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c","value":"Europe PubMed Central"},"publicationdate":"2018-01-01","refereed":"UNKNOWN","type":"Article","url":["http://europepmc.org/articles/PMC5725307"]},{"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"license":"http://creativecommons.org/licenses/by/4.0","publicationdate":"2018-01-01","refereed":"UNKNOWN","type":"Other literature type","url":["http://dx.doi.org/10.1016/j.jpsychires.2017.09.020"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1603727328518,"maintitle":"Measuring childhood maltreatment to predict early-adult psychopathology:Comparison of prospective informant-reports and retrospective self-reports","originalId":["oai:pure.atira.dk:publications/6b9fcf93-2732-49fc-9f0d-d6e398c56883","S0022395617307975","10.1016/j.jpsychires.2017.09.020","2760657291","oai:eprints.ucl.ac.uk.OAI2:10075526","oai:pubmedcentral.nih.gov:5725307"],"pid":[{"scheme":"doi","value":"10.1016/j.jpsychires.2017.09.020"},{"scheme":"pmc","value":"PMC5725307"},{"scheme":"pmid","value":"28965006"}],"projects":[{"code":"5R01HD077482-05","funder":{"fundingStream":"EUNICE KENNEDY SHRIVER NATIONAL INSTITUTE OF CHILD HEALTH & HUMAN DEVELOPMENT","jurisdiction":"US","name":"National Institutes of Health","shortName":"NIH"},"id":"40|nih_________::0a44d4e82d5b835e3512ee3e9aed50ed","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Neuropsychological and genomic signatures of violence exposure in childhood"},{"code":"1002190","funder":{"fundingStream":"Research Fellowships","jurisdiction":"AU","name":"National Health and Medical Research Council (NHMRC)","shortName":"NHMRC"},"id":"40|nhmrc_______::2f73997cd5fd2ca34b270952973df14d","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Physiology and pathophysiology of disorders of the musculoskelatal system"}],"publicationdate":"2018-01-01","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Child abuse, Mental health, Adolescence, Early adulthood, Assessment, Recall bias"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Article"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Child abuse"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Mental health"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Adolescence"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Early adulthood"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Assessment"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Recall bias"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.72"},"subject":{"scheme":"mesheuropmc","value":"humanities"}}],"type":"publication"} +{"author":[{"fullname":"Mara Novero","name":"Mara","pid":{"id":{"scheme":"orcid","value":"0000-0001-7412-8750"}},"rank":1,"surname":"Novero"},{"fullname":"Paola Bonfante","name":"Paola","pid":{"id":{"scheme":"orcid","value":"0000-0003-3576-8530"}},"rank":2,"surname":"Bonfante"},{"fullname":"Matteo Chialva","name":"Matteo","pid":{"id":{"scheme":"orcid","value":"0000-0002-6996-6642"}},"rank":3,"surname":"Chialva"},{"fullname":"Silvia PEROTTO","name":"Silvia","pid":{"id":{"scheme":"orcid","value":"0000-0003-0121-1806"}},"rank":4,"surname":"PEROTTO"},{"fullname":"Marco Chiapello","name":"Marco","pid":{"id":{"scheme":"orcid","value":"0000-0001-7768-3047"}},"rank":5,"surname":"Chiapello"},{"fullname":"Stefano Ghignone","name":"Stefano","pid":{"id":{"scheme":"orcid","value":"0000-0002-2033-2286"}},"rank":6,"surname":"Ghignone"},{"fullname":"Stefania DAGHINO","name":"Stefania","pid":{"id":{"scheme":"orcid","value":"0000-0001-5722-1558"}},"rank":7,"surname":"DAGHINO"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|opendoar____::89fcd07f20b6785b92134bd6c1d0fa42","value":"Archivio Istituzionale"},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"container":{"ep":"1308","issnPrinted":"0028-646X","name":"New Phytologist","sp":"1296","vol":"220"},"context":[{"code":"sdsn-gr","label":"SDSN - Greece","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"contributor":[],"country":[{"code":"IT","label":"Italy","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"2019-12-19T10:23:19Z","description":["Summary\r\nSeveral studies have investigated soil microbial biodiversity, but understanding of the mechanisms underlying plant responses to soil microbiota remains in its infancy. Here, we focused on tomato (Solanum lycopersicum), testing the hypothesis that plants grown on native soils display different responses to soil microbiotas.\r\nUsing transcriptomics, proteomics, and biochemistry, we describe the responses of two tomato genotypes (susceptible or resistant to Fusarium oxysporum f. sp. lycopersici) grown on an artificial growth substrate and two native soils (conducive and suppressive to Fusarium).\r\nNative soils affected tomato responses by modulating pathways involved in responses to oxidative stress, phenol biosynthesis, lignin deposition, and innate immunity, particularly in the suppressive soil. In tomato plants grown on steam-disinfected soils, total phenols and lignin decreased significantly. The inoculation of a mycorrhizal fungus partly rescued this response locally and systemically. Plants inoculated with the fungal pathogen showed reduced disease symptoms in the resistant genotype in both soils, but the susceptible genotype was partially protected from the pathogen only when grown on the suppressive soil.\r\nThe ‘state of alert’ detected in tomatoes reveals novel mechanisms operating in plants in native soils and the soil microbiota appears to be one of the drivers of these plant responses."],"format":[],"id":"50|dedup_wf_001::09e54cbad6670d8f40a7de2704e2d2a4","instance":[{"accessright":{"code":"c_16ec","label":"RESTRICTED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|issn___print::35ee75a5ad42581d604be113a8f56427","value":"New Phytologist"},"license":"http://doi.wiley.com/10.1002/tdm_license_1.1","publicationdate":"2018-02-09","refereed":"UNKNOWN","type":"Article","url":["https://api.wiley.com/onlinelibrary/tdm/v1/articles/10.1111%2Fnph.15014","http://onlinelibrary.wiley.com/wol1/doi/10.1111/nph.15014/fullpdf","http://dx.doi.org/10.1111/nph.15014"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|issn___print::35ee75a5ad42581d604be113a8f56427","value":"New Phytologist"},"refereed":"UNKNOWN","type":"Article","url":["https://nph.onlinelibrary.wiley.com/doi/pdfdirect/10.1111/nph.15014"]},{"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|issn___print::35ee75a5ad42581d604be113a8f56427","value":"New Phytologist"},"refereed":"UNKNOWN","type":"Article","url":["https://nph.onlinelibrary.wiley.com/doi/full/10.1111/nph.15014","https://nph.onlinelibrary.wiley.com/doi/pdf/10.1111/nph.15014","http://onlinelibrary.wiley.com/doi/10.1111/nph.15014/abstract","https://aperto.unito.it/handle/2318/1660820","https://academic.microsoft.com/#/detail/2793208206"]},{"accessright":{"code":"c_16ec","label":"RESTRICTED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::89fcd07f20b6785b92134bd6c1d0fa42","value":"Archivio Istituzionale"},"hostedby":{"key":"10|opendoar____::89fcd07f20b6785b92134bd6c1d0fa42","value":"Archivio Istituzionale"},"publicationdate":"2018-01-01","refereed":"UNKNOWN","type":"Article","url":["http://hdl.handle.net/2318/1660820"]},{"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2018-12-01","refereed":"UNKNOWN","type":"Other literature type","url":["http://dx.doi.org/10.1111/nph.15014"]}],"language":{"code":"und","label":"Undetermined"},"lastupdatetimestamp":1603727328518,"maintitle":"Native soils with their microbiotas elicit a state of alert in tomato plants","originalId":["10.1111/nph.15014","2793208206","oai:iris.unito.it:2318/1660820"],"pid":[{"scheme":"doi","value":"10.1111/nph.15014"},{"scheme":"pmid","value":"29424928"}],"projects":[{"acronym":"TomRes","code":"727929","funder":{"fundingStream":"H2020","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda__h2020::94c4a066401e22002c4811a301bb4655","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"A NOVEL AND INTEGRATED APPROACH TO INCREASE MULTIPLE AND COMBINED STRESS TOLERANCE IN PLANTS USING TOMATO AS A MODEL"}],"publicationdate":"2018-12-01","publisher":"Wiley","subjects":[{"subject":{"scheme":"keywords","value":"Plant Science"}},{"subject":{"scheme":"keywords","value":"Physiology"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Arbuscular mycorrhizal fungi; Defence responses; Lignin biosynthesis; Microbiota; Suppressive and conducive soils; Susceptible and resistant genotypes; Tomato; Physiology; Plant Science"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.891"},"subject":{"scheme":"mesheuropmc","value":"food and beverages"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.891"},"subject":{"scheme":"mesheuropmc","value":"fungi"}},{"provenance":{"provenance":"Inferred by OpenAIRE","trust":"0.8739"},"subject":{"scheme":"mesheuropmc","value":"complex mixtures"}}],"type":"publication"} +{"author":[{"fullname":"K.P. Moustris","name":"K. P.","rank":1,"surname":"Moustris"},{"fullname":"D. Zafirakis","name":"D.","rank":2,"surname":"Zafirakis"},{"fullname":"K.A. Kavvadias","name":"K. A.","rank":3,"surname":"Kavvadias"},{"fullname":"J.K. Kaldellis","name":"J. K.","rank":4,"surname":"Kaldellis"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"}],"container":{"name":"Mediterranean Conference on Power Generation, Transmission, Distribution and Energy Conversion (MedPower 2016)"},"context":[{"code":"sdsn-gr","label":"SDSN - Greece","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"2020-02-01T05:10:47Z","description":["One of the main parameters affecting the reliability of the renewable energy sources (RES) system, compared to the local conventional power station, is the ability to forecast the RES availability for a few hours ahead. To this end, the main objective of this work is the prognosis of the mean, maximum and minimum hourly wind power (WP) 8hours ahead. For this purpose, Artificial Neural Networks (ANN) modeling is applied. For the appropriate training of the developed ANN models hourly meteorological data are used. These data have been recorded by a meteorological mast in Tilos Island, Greece.\n\nFor the evaluation of the developed ANN forecasting models proper statistical evaluation indices are used. According to the results, the coefficient of the determination ranges from 0.285 up to 0.768 (mean hourly WP), from 0.227 up to 0.798 (maximum hourly WP) and from 0.025 up to 0.398 (minimum hourly WP). Furthermore, the proposed forecasting methodology shows that is able to give sufficient and adequate prognosis of WP by a wind turbine in a specific location 8 hours ahead. This will be a useful tool for the operator of a RES system in order to achieve a better monitoring and a better management of the whole system."],"format":[],"id":"50|dedup_wf_001::0a6525f420fd1f922125c3a1a280c8df","instance":[{"accessright":{"code":"c_16ec","label":"RESTRICTED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2017-02-23T20:11:53Z","refereed":"UNKNOWN","type":"Conference object","url":["http://digital-library.theiet.org/content/conferences/10.1049/cp.2016.1094?crawler=true","http://dx.doi.org/10.1049/cp.2016.1094"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"license":"cc-by","refereed":"UNKNOWN","type":"Conference object","url":["https://zenodo.org/record/887339/files/Wind%20power%20forecasting%20using%20historical%20data%20and%20artificial%20neural%20networks%20modeling.pdf"]},{"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"refereed":"UNKNOWN","type":"Conference object","url":["http://digital-library.theiet.org/content/conferences/10.1049/cp.2016.1094","https://core.ac.uk/display/144851191","http://jglobal.jst.go.jp/public/20090422/201702260673071390","https://academic.microsoft.com/#/detail/2590295725"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"license":"http://creativecommons.org/licenses/by/4.0/legalcode","publicationdate":"2016-11-07","refereed":"UNKNOWN","type":"Conference object","url":["https://zenodo.org/record/887339"]}],"language":{"code":"und","label":"Undetermined"},"lastupdatetimestamp":1603727328518,"maintitle":"Wind power forecasting using historical data and artificial neural networks modeling","originalId":["10.1049/cp.2016.1094","2590295725","oai:zenodo.org:887339"],"pid":[{"scheme":"doi","value":"10.1049/cp.2016.1094"}],"projects":[{"acronym":"TILOS","code":"646529","funder":{"fundingStream":"H2020","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda__h2020::64c901e621f90b9a38badb1d78902205","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Technology Innovation for the Local Scale, Optimum Integration of Battery Energy Storage"}],"publicationdate":"2016-11-07","publisher":"Institution of Engineering and Technology","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"Wind Power, Forecasting, Artificial Neural Networks"}}],"type":"publication"} +{"author":[{"fullname":"Juan M. Traverso","name":"Juan M.","rank":1,"surname":"Traverso"},{"fullname":"Julien Bobe","name":"Julien","rank":2,"surname":"Bobe"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"}],"context":[{"code":"fam","label":"Fisheries and Aquaculture Management","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]},{"code":"mes","label":"European Marine Science","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"contributor":["Station commune de Recherches en Ichtyophysiologie, Biodiversité et Environnement (SCRIBE) ; Institut National de la Recherche Agronomique (INRA)","Institut Fédératif de Recherche - Génétique Fonctionnelle Agronomie et Santé (IFR 140 GFAS)","European Project: 222719,EC:FP7:KBBE,FP7-KBBE-2007-2A,LIFECYCLE(2009)","Station commune de Recherches en Ichtyophysiologie, Biodiversité et Environnement (SCRIBE) ; Institut National de la Recherche Agronomique (INRA)-IFR140"],"country":[{"code":"FR","label":"France","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"2020-10-17T09:56:33.515Z","description":[],"format":[],"id":"50|dedup_wf_001::0b9078b1d22faeab00e52a2796dbbad0","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},"hostedby":{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},"publicationdate":"2009-07-18","refereed":"UNKNOWN","type":"Conference object","url":["https://hal.inrae.fr/hal-02754267"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},"hostedby":{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},"publicationdate":"2009-07-18","refereed":"UNKNOWN","type":"Conference object","url":["https://hal.inrae.fr/hal-02754267"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"hostedby":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"publicationdate":"2009-07-18","refereed":"UNKNOWN","type":"Conference object","url":["https://hal.inrae.fr/hal-02754267"]},{"accessright":{"code":"c_16ec","label":"RESTRICTED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|issn___print::e7b4be4856b6d6d7c3484bf403b44634","value":"Biology of Reproduction"},"publicationdate":"2009-07-01","refereed":"UNKNOWN","type":"Article","url":["http://academic.oup.com/biolreprod/article-pdf/81/Suppl_1/285/","http://dx.doi.org/10.1093/biolreprod/81.s1.285"]},{"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|issn___print::e7b4be4856b6d6d7c3484bf403b44634","value":"Biology of Reproduction"},"refereed":"UNKNOWN","type":"Article","url":["https://academic.oup.com/biolreprod/article/2955191/The","https://academic.microsoft.com/#/detail/2594950426"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1603727328518,"maintitle":"The maternal contribution of oocyte-specific genes to embryo development in zebrafish: analysis by real-time PCR and knock-down using morpholino oligos","originalId":["oai:HAL:hal-02754267v1","10.1093/biolreprod/81.s1.285","2594950426"],"pid":[{"scheme":"doi","value":"10.1093/biolreprod/81.s1.285"}],"projects":[{"acronym":"LIFECYCLE","code":"222719","funder":{"fundingStream":"FP7","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda_______::f3b07b1c9d0afbdb983a9b5820f4e34b","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Building a biological knowledge-base on fish lifecycles for competitive, sustainable European aquaculture"}],"publicationdate":"2009-07-18","publisher":"HAL CCSD","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"[SDV.BDLR]Life Sciences [q-bio]/Reproductive Biology"}}],"type":"publication"} +{"author":[{"fullname":"Pasquale Nino","name":"Pasquale","pid":{"id":{"scheme":"orcid","value":"0000-0002-7070-4586"}},"rank":1,"surname":"Nino"},{"fullname":"Guido D'Urso","name":"Guido","pid":{"id":{"scheme":"orcid","value":"0000-0002-0251-4668"}},"rank":2,"surname":"D'Urso"},{"fullname":"Salvatore Falanga Bolognesi","name":"Salvatore","pid":{"id":{"scheme":"orcid","value":"0000-0003-0134-2574"}},"rank":3,"surname":"Falanga Bolognesi"},{"fullname":"Giuseppe Pulighe","name":"Giuseppe","pid":{"id":{"scheme":"orcid","value":"0000-0002-6470-0984"}},"rank":4,"surname":"Pulighe"},{"fullname":"Carlo De Michele","name":"Carlo","pid":{"id":{"scheme":"orcid","value":"0000-0002-3797-850X"}},"rank":5,"surname":"De Michele"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9","value":"Sygma"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|openaire____::c2cdfa5866e03cdd07d313cbc8fb8311","value":"Multidisciplinary Digital Publishing Institute"},{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},{"key":"10|opendoar____::d8c24ca8f23c562a5600876ca2a550ce","value":"Archivio della ricerca - Università degli studi di Napoli Federico II"}],"container":{"edition":"","ep":"14730","iss":"11","issnLinking":"","issnOnline":"2072-4292","issnPrinted":"","name":"Remote Sensing","sp":"14708","vol":"7"},"context":[{"code":"sdsn-gr","label":"SDSN - Greece","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"contributor":[],"country":[{"code":"IT","label":"Italy","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"","description":["The sustainable management of water resources plays a key role in Mediterranean viticulture, characterized by scarcity and competition of available water. This study focuses on estimating the evapotranspiration and crop coefficients of table grapes vineyards trained on overhead “tendone” systems in the Apulia region (Italy). Maximum vineyard transpiration was estimated by adopting the “direct” methodology for ETp proposed by the Food and Agriculture Organization in Irrigation and Drainage Paper No. 56, with crop parameters estimated from Landsat 8 and RapidEye satellite data in combination with ground-based meteorological data. The modeling results of two growing seasons (2013 and 2014) indicated that canopy growth, seasonal and 10-day sums evapotranspiration values were strictly related to thermal requirements and rainfall events. The estimated values of mean seasonal daily evapotranspiration ranged between 4.2 and 4.1 mm·d−1, while midseason estimated values of crop coefficients ranged from 0.88 to 0.93 in 2013, and 1.02 to 1.04 in 2014, respectively. The experimental evapotranspiration values calculated represent the maximum value in absence of stress, so the resulting crop coefficients should be used with some caution. It is concluded that the retrieval of crop parameters and evapotranspiration derived from remotely-sensed data could be helpful for downscaling to the field the local weather conditions and agronomic practices and thus may be the basis for supporting grape growers and irrigation managers."],"format":["application/pdf"],"id":"50|dedup_wf_001::0bc332de35b4eb0cc3d0c54b3ff22dd3","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9","value":"Sygma"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"license":"https://creativecommons.org/licenses/by/4.0/","refereed":"UNKNOWN","type":"Article","url":["http://dx.doi.org/10.3390/rs71114708"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|doajarticles::f7180a75ef642fcf5309759e0d2c431a","value":"Remote Sensing"},"license":"https://creativecommons.org/licenses/by/4.0/","publicationdate":"2015-11-05","refereed":"UNKNOWN","type":"Article","url":["http://www.mdpi.com/2072-4292/7/11/14708/pdf","http://dx.doi.org/10.3390/rs71114708"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|doajarticles::f7180a75ef642fcf5309759e0d2c431a","value":"Remote Sensing"},"license":"cc-by","refereed":"UNKNOWN","type":"Article","url":["https://www.mdpi.com/2072-4292/7/11/14708/pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|doajarticles::f7180a75ef642fcf5309759e0d2c431a","value":"Remote Sensing"},"refereed":"UNKNOWN","type":"Article","url":["https://dblp.uni-trier.de/db/journals/remotesensing/remotesensing7.html#VaninoPNMBD15","https://doi.org/10.3390/rs71114708","https://core.ac.uk/display/55147144","http://doi.org/10.3390/rs71114708","https://academic.microsoft.com/#/detail/2149946514"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::c2cdfa5866e03cdd07d313cbc8fb8311","value":"Multidisciplinary Digital Publishing Institute"},"hostedby":{"key":"10|doajarticles::f7180a75ef642fcf5309759e0d2c431a","value":"Remote Sensing"},"license":"http://creativecommons.org/licenses/by/3.0/","publicationdate":"2015-11-05","refereed":"UNKNOWN","type":"Other literature type","url":["http://dx.doi.org/10.3390/rs71114708"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"},"hostedby":{"key":"10|doajarticles::f7180a75ef642fcf5309759e0d2c431a","value":"Remote Sensing"},"publicationdate":"2015-11-01","refereed":"UNKNOWN","type":"Article","url":["http://www.mdpi.com/2072-4292/7/11/14708","https://doaj.org/toc/2072-4292"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::d8c24ca8f23c562a5600876ca2a550ce","value":"Archivio della ricerca - Università degli studi di Napoli Federico II"},"hostedby":{"key":"10|opendoar____::d8c24ca8f23c562a5600876ca2a550ce","value":"Archivio della ricerca - Università degli studi di Napoli Federico II"},"publicationdate":"2015-01-01","refereed":"UNKNOWN","type":"Article","url":["http://hdl.handle.net/11588/637935"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1603727328518,"maintitle":"Estimation of Evapotranspiration and Crop Coefficients of Tendone Vineyards Using Multi-Sensor Remote Sensing Data in a Mediterranean Environment","originalId":["","rs71114708","10.3390/rs71114708","2149946514","oai:mdpi.com:/2072-4292/7/11/14708/","oai:doaj.org/article:939730c0b55b4a8bbaa150970587d25a","oai:www.iris.unina.it:11588/637935"],"pid":[{"scheme":"doi","value":"10.3390/rs71114708"}],"projects":[{"acronym":"FATIMA","code":"633945","funder":{"fundingStream":"H2020","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda__h2020::b0e820e327215ca5b86884d536a17fef","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"FArming Tools for external nutrient Inputs and water MAnagement"}],"publicationdate":"2015-11-05","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"evapotranspiration"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"crop coefficient"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"leaf area index"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Landsat 8"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"RapidEye"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"remote sensing"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"vineyards"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"table grapes"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Science"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Q"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"Crop coefficient; Evapotranspiration; Landsat 8; Leaf area index; RapidEye; Remote sensing; Table grapes; Vineyards; Earth and Planetary Sciences (all)"}}],"type":"publication"} +{"author":[{"fullname":"Rousselet, L.","name":"L.","pid":{"id":{"scheme":"orcid","value":"0000-0001-5016-3658"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":1,"surname":"Rousselet"},{"fullname":"Doglioli, A. M.","name":"A. M.","pid":{"id":{"scheme":"orcid","value":"0000-0003-1309-9954"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":2,"surname":"Doglioli"},{"fullname":"Verneil, A.","name":"A.","pid":{"id":{"scheme":"orcid","value":"0000-0002-8344-7953"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":3,"surname":"Verneil"},{"fullname":"Pietri, A.","name":"A.","pid":{"id":{"scheme":"orcid","value":"0000-0003-1111-9640"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":4,"surname":"Pietri"},{"fullname":"Della Penna, A.","name":"A.","pid":{"id":{"scheme":"orcid","value":"0000-0002-7579-3610"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":5,"surname":"Della Penna"},{"fullname":"Berline, L.","name":"L.","pid":{"id":{"scheme":"orcid","value":"0000-0002-5831-7399"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":6,"surname":"Berline"},{"fullname":"Marrec, P.","name":"P.","pid":{"id":{"scheme":"orcid","value":"0000-0002-7811-4150"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":7,"surname":"Marrec"},{"fullname":"Grégori, G.","name":"G.","rank":8,"surname":"Grégori"},{"fullname":"Thyssen, M.","name":"M.","rank":9,"surname":"Thyssen"},{"fullname":"Carlotti, F.","name":"F.","rank":10,"surname":"Carlotti"},{"fullname":"Barrillon, S.","name":"S.","rank":11,"surname":"Barrillon"},{"fullname":"Simon‐Bot, F.","name":"F.","rank":12,"surname":"Simon‐bot"},{"fullname":"Bonal, M.","name":"M.","pid":{"id":{"scheme":"orcid","value":"0000-0002-2077-3049"},"provenance":{"provenance":"Harvested","trust":"0.9"}},"rank":13,"surname":"Bonal"},{"fullname":"d'Ovidio, F.","name":"F.","rank":14,"surname":"D Ovidio"},{"fullname":"Petrenko, A.","name":"A.","rank":15,"surname":"Petrenko"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"ORCID"},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},{"key":"10|opendoar____::71560ce98c8250ce57a6a970c9991a5f","value":"HAL-IRD"},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},{"key":"10|opendoar____::18bb68e2b38e4a8ce7cf4f6b2625768c","value":"Hal-Diderot"},{"key":"10|opendoar____::2d2c8394e31101a261abf1784302bf75","value":"HAL AMU"},{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},{"key":"10|opendoar____::8f14e45fceea167a5a36dedd4bea2543","value":"ArchiMer - Institutional Archive of Ifremer"}],"context":[{"code":"mes","label":"European Marine Science","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"contributor":["Institut méditerranéen d'océanologie (MIO) ; Centre National de la Recherche Scientifique (CNRS)-Université de Toulon (UTLN)-Aix Marseille Université (AMU)-Institut de Recherche pour le Développement (IRD)","New York University [Abu Dhabi] ; NYU System (NYU)","Laboratoire d'Océanographie et du Climat : Expérimentations et Approches Numériques (LOCEAN) ; Muséum National d'Histoire Naturelle (MNHN)-Institut de Recherche pour le Développement (IRD)-Institut national des sciences de l'Univers (INSU - CNRS)-Sorbonne Université (SU)-Centre National de la Recherche Scientifique (CNRS)","University of Washington [Seattle]","Université Libre de Bruxelles [Bruxelles] (ULB)","ANR-09-CEXC-0006,FOCEA,Excellence pour une synergie observation-modèle et l'évaluation de l'état écologique de la Méditerranée Nord Occidentale(2009)","ANR: 11-IDEX-0001,Amidex,INITIATIVE D'EXCELLENCE AIX MARSEILLE UNIVERSITE(2011)","ANR: 11-LABX-0061,OTMed,Objectif Terre : Bassin Méditerranéen(2011)","European Project: 624170,EC:FP7:PEOPLE,FP7-PEOPLE-2013-IEF,SEAQUEST(2015)","Institut méditerranéen d'océanologie (MIO) ; Institut de Recherche pour le Développement (IRD)-Aix Marseille Université (AMU)-Institut national des sciences de l'Univers (INSU - CNRS)-Université de Toulon (UTLN)-Centre National de la Recherche Scientifique (CNRS)","Laboratoire d'Océanographie et du Climat : Expérimentations et Approches Numériques (LOCEAN) ; Sorbonne Université (SU)-Institut national des sciences de l'Univers (INSU - CNRS)-Centre National de la Recherche Scientifique (CNRS)-Muséum national d'Histoire naturelle (MNHN)-Institut de Recherche pour le Développement (IRD)","Université libre de Bruxelles (ULB)","Processus et interactions de fine échelle océanique (PROTEO) ; Laboratoire d'Océanographie et du Climat : Expérimentations et Approches Numériques (LOCEAN) ; Sorbonne Université (SU)-Institut national des sciences de l'Univers (INSU - CNRS)-Centre National de la Recherche Scientifique (CNRS)-Muséum national d'Histoire naturelle (MNHN)-Institut de Recherche pour le Développement (IRD)-Sorbonne Université (SU)-Institut national des sciences de l'Univers (INSU - CNRS)-Centre National de la Recherche Scientifique (CNRS)-Muséum national d'Histoire naturelle (MNHN)-Institut de Recherche pour le Développement (IRD)","ANR-11-IDEX-0001-02/11-IDEX-0001,AMIDEX,AMIDEX(2011)"],"country":[{"code":"FR","label":"France","provenance":{"provenance":"Propagation of country to result collected from datasources of type institutional repositories","trust":"0.85"}}],"coverage":[],"dateofcollection":"2019-10-11T09:44:42.348Z","description":["International audience; Vertical velocities can be estimated indirectly from in situ observations by theoretical frameworks like the-equation. Direct measures of vertical exchanges are challenging due to their typically ephemeral spatiotemporal scales. In this study we address this problem with an adaptive sampling strategy coupling various biophysical instruments. We analyze the 3-D organization of a cyclonic mesoscale structure finely sampled during the Observing Submesoscale Coupling At High Resolution cruise in the Ligurian Sea during fall 2015. The observations, acquired with a moving vessel profiler, highlight a subsurface low-salinity layer (≃50 m), as well as rising isopycnals, generated by geostrophic cyclonic circulation, in the structure's center. Reconstructed 3-D fields of density and horizontal velocities are used to estimate the vertical velocity field down to 250 m by applying the adiabatic QG-equation, for the first time in this region. The vertical motions are characterized by multipolar patterns of downward and upward velocities on the edges of the structure and significantly smaller vertical velocities in its center. Both the 3-D distribution of particles (size ≥100 μm), measured with a laser optical plankton counter, and the Synechococcus and Prochlorococcus abundances (cell per cubic meter) measured by flow cytometry are consistent with the 3-D velocity field. In particular, a secondary vertical recirculation is identified that upwells particles (from 250 to 100 m) along isohalines to the structure's center. Besides demonstrating the effect of vertical patterns on biogeochemical distributions, this case study suggests to use particle matter as a tracer to assess physical dynamics."],"format":["application/pdf"],"id":"50|dedup_wf_001::0d16b1714ab3077df73893a8ea57d776","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"hostedby":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795/document","https://hal-amu.archives-ouvertes.fr/hal-02124795/file/Vertical%20motions%20and%20their%20effects%20on%20a%20biogeochemical%20tracer%20in%20a%20cyclonic%20structure%20finely%20observed%20in%20the%20Ligurian%20Sea.pdf"]},{"accessright":{"code":"c_16ec","label":"RESTRICTED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"},"hostedby":{"key":"10|issn___print::3e65fd758a9d3e24027c0cb2822a79f2","value":"Journal of Geophysical Research Oceans"},"license":"http://onlinelibrary.wiley.com/termsAndConditions#vor","publicationdate":"2019-04-23T07:08:10Z","refereed":"UNKNOWN","type":"Article","url":["https://onlinelibrary.wiley.com/doi/pdf/10.1029/2018JC014392","https://onlinelibrary.wiley.com/doi/full-xml/10.1029/2018JC014392","http://dx.doi.org/10.1029/2018jc014392"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall"},"hostedby":{"key":"10|issn___print::3e65fd758a9d3e24027c0cb2822a79f2","value":"Journal of Geophysical Research Oceans"},"refereed":"UNKNOWN","type":"Article","url":["https://archimer.ifremer.fr/doc/00490/60214/63567.pdf"]},{"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph"},"hostedby":{"key":"10|issn___print::3e65fd758a9d3e24027c0cb2822a79f2","value":"Journal of Geophysical Research Oceans"},"refereed":"UNKNOWN","type":"Article","url":["https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1029/2018JC014392","https://hal-amu.archives-ouvertes.fr/hal-02124795v2","https://academic.microsoft.com/#/detail/2941945454"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::71560ce98c8250ce57a6a970c9991a5f","value":"HAL-IRD"},"hostedby":{"key":"10|opendoar____::71560ce98c8250ce57a6a970c9991a5f","value":"HAL-IRD"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"hostedby":{"key":"10|opendoar____::7e7757b1e12abcb736ab9a754ffb617a","value":"Hyper Article en Ligne"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2019-06-07","refereed":"UNKNOWN","type":"Other literature type","url":["http://dx.doi.org/10.1029/2018jc014392"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},"hostedby":{"key":"10|opendoar____::1534b76d325a8f591b52d302e7181331","value":"Mémoires en Sciences de l'Information et de la Communication"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::18bb68e2b38e4a8ce7cf4f6b2625768c","value":"Hal-Diderot"},"hostedby":{"key":"10|opendoar____::18bb68e2b38e4a8ce7cf4f6b2625768c","value":"Hal-Diderot"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::2d2c8394e31101a261abf1784302bf75","value":"HAL AMU"},"hostedby":{"key":"10|opendoar____::2d2c8394e31101a261abf1784302bf75","value":"HAL AMU"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},"hostedby":{"key":"10|opendoar____::88855547570f7ff053fff7c54e5148cc","value":"HAL Descartes"},"license":"http://hal.archives-ouvertes.fr/licences/copyright/","publicationdate":"2019-04-23","refereed":"UNKNOWN","type":"Article","url":["https://hal-amu.archives-ouvertes.fr/hal-02124795","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/document","https://hal-amu.archives-ouvertes.fr/hal-02124795v2/file/HAL_Rousselet_et_al_2019_vertical.pdf"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::8f14e45fceea167a5a36dedd4bea2543","value":"ArchiMer - Institutional Archive of Ifremer"},"hostedby":{"key":"10|opendoar____::8f14e45fceea167a5a36dedd4bea2543","value":"ArchiMer - Institutional Archive of Ifremer"},"publicationdate":"2019-06-01","refereed":"UNKNOWN","type":"Other literature type","url":["https://archimer.ifremer.fr/doc/00490/60214/"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1603727328518,"maintitle":"Vertical motions and their effects on a biogeochemical tracer in a cyclonic structure finely observed in the Ligurian Sea","originalId":["oai:HAL:hal-02124795v1","10.1029/2018JC014392","10.1029/2018jc014392","2941945454","oai:HAL:hal-02124795v2","oai:archimer.ifremer.fr:60214"],"pid":[{"scheme":"doi","value":"10.1029/2018JC014392"}],"projects":[{"acronym":"SEAQUEST","code":"624170","funder":{"fundingStream":"FP7","jurisdiction":"EU","name":"European Commission","shortName":"EC"},"id":"40|corda_______::a2aa3e7ebdf06a249bbedc0cef87e602","provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"title":"Meso and Sub-mesoscale Physico-biogeochemical Dynamics in a Coastal NW Mediterranean Sea: Quantifying and Understanding Ecosystem Structure and Transport"}],"publicationdate":"2019-04-23","publisher":"HAL CCSD","subjects":[{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"high‐resolution reconstructions of 3‐D fields"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"vertical velocities estimated with ω‐equation"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"particle distribution as a tracer for vertical advection"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"UNKNOWN","value":"[SDU.OCEAN]Sciences of the Universe [physics]/Ocean, Atmosphere"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"high-resolution reconstructions of 3-D fields"}},{"provenance":{"provenance":"Harvested","trust":"0.9"},"subject":{"scheme":"keyword","value":"vertical velocities estimated with omega-equation"}}],"type":"publication"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation new file mode 100644 index 000000000..3cb855c1a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation @@ -0,0 +1,8 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"40|cihr________::1e5e62235d094afd01cd56e65112fc63"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::5ac587eb28411c351c2e357eb097fd3d","subRelType":"provision","target":"40|nwo_________::dc69ada721bf21ed51055b6421850d73"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::690b3aaf177a4c70b81bacd8d023cbdc","subRelType":"provision","target":"40|nih_________::031bb5f2f70239b3210eda38b2079f67"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::7a71f278237d1ab35088efda03fa007a","subRelType":"provision","target":"40|nsf_________::03748bcb5d754c951efec9700e18a56d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::8b75543067b50076e70764917e188178","subRelType":"provision","target":"40|snsf________::50cb15ff7a6a3f8531f063770179e346"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::9f3ff882f023209d9ffb4dc32b77d376","subRelType":"provision","target":"40|corda_______::ffc1811633b3222e4764c7b0517f83e8"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::b566fa319c3923454e1e8eb886ab62d2","subRelType":"provision","target":"40|nhmrc_______::4e6c928fef9851b37ec73f4f6daca35b"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","subRelType":"provision","target":"40|corda__h2020::846b777af165fef7c904a81712a83b66"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/software b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/software new file mode 100644 index 000000000..e69de29bb diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json new file mode 100644 index 000000000..fda631c2f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json @@ -0,0 +1,32 @@ +{"author":[{"fullname":"Niskala, Eino","name":"Eino","pid":[],"rank":1,"surname":"Niskala"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.375Z","dateoftransformation":"2020-10-14T13:55:20.918Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::2cfa9f434e854612c7cbdeb43433ac24","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719957520,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-30T11:16:46Z","harvestDate":"2020-10-14T13:09:13.375Z","identifier":"oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Niskala , E 1988 , ' Puutalon ulkovaipan korjaus ' , Kodinrakentaja , no. 3 , pp. 57-60 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puutalon ulkovaipan korjaus"}]} +{"author":[{"fullname":"Ojala, Marja","name":"Marja","pid":[],"rank":1,"surname":"Ojala"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"dateofcollection":"2020-10-14T13:02:41.238Z","dateoftransformation":"2020-10-14T14:15:44.877Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::530df3a532f711fd65c9248f9e575b60","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/111b446c-4dd9-4a3b-8609-fb44c461fe03","https://doi.org/10.1002/jhrc.1240161114"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720010422,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-01-01T02:18:57Z","harvestDate":"2020-10-14T13:02:41.238Z","identifier":"oai:cris.vtt.fi:publications/111b446c-4dd9-4a3b-8609-fb44c461fe03","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/111b446c-4dd9-4a3b-8609-fb44c461fe03"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1002/jhrc.1240161114"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ojala , M 1993 , ' Simultaneous separation and determination of chlorobenzenes, PCBs, and chlorophenols using silica gel fractionation and GC-ECD analysis ' , HRC Journal of High Resolution Chromatography , vol. 16 , no. 11 , pp. 679 - 682 . https://doi.org/10.1002/jhrc.1240161114"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/b1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"B1 Unrefereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Simultaneous separation and determination of chlorobenzenes, PCBs, and chlorophenols using silica gel fractionation and GC-ECD analysis"}]} +{"author":[{"fullname":"Flanigon, James","name":"James","pid":[],"rank":1,"surname":"Flanigon"},{"fullname":"Kamali-Moghaddam, Masood","name":"Masood","pid":[],"rank":2,"surname":"Kamali-Moghaddam"},{"fullname":"Burbulis, Ian","name":"Ian","pid":[],"rank":3,"surname":"Burbulis"},{"fullname":"Annink, Carla","name":"Carla","pid":[],"rank":4,"surname":"Annink"},{"fullname":"Steffen, Martin","name":"Martin","pid":[],"rank":5,"surname":"Steffen"},{"fullname":"Oeth, Paul","name":"Paul","pid":[],"rank":6,"surname":"Oeth"},{"fullname":"Brent, Roger","name":"Roger","pid":[],"rank":7,"surname":"Brent"},{"fullname":"van den Boom, Dirk","name":"Dirk","pid":[],"rank":8,"surname":"Den Boom"},{"fullname":"Landegren, Ulf","name":"Ulf","pid":[],"rank":9,"surname":"Landegren"},{"fullname":"Cantor, Charles","name":"Charles","pid":[],"rank":10,"surname":"Cantor"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"2020-10-14T12:59:43.293Z","dateoftransformation":"2020-10-14T14:18:11.463Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Multiplex protein quantification has been constrained by issues of assay specificity, sensitivity and throughput. This research presents a novel approach that overcomes these limitations using antibody–oligonucleotide conjugates for immuno-polymerase chain reaction (immuno-PCR) or proximity ligation, coupled with competitive PCR and MALDI-TOF mass spectrometry. Employing these combinations of technologies, we demonstrate multiplex detection and quantification of up to eight proteins, spanning wide dynamic ranges from femtomolar concentrations, using only microliter sample volumes."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::57ce39c409b63b6db7d3dec83b5a80d7","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/1f6a6cfe-8e22-4074-abae-6b094ecac56b","https://doi.org/10.1016/j.nbt.2012.11.003"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720017299,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-01-01T01:49:46Z","harvestDate":"2020-10-14T12:59:43.293Z","identifier":"oai:cris.vtt.fi:publications/1f6a6cfe-8e22-4074-abae-6b094ecac56b","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/1f6a6cfe-8e22-4074-abae-6b094ecac56b"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.nbt.2012.11.003"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Flanigon , J , Kamali-Moghaddam , M , Burbulis , I , Annink , C , Steffen , M , Oeth , P , Brent , R , van den Boom , D , Landegren , U & Cantor , C 2013 , ' Multiplex protein detection with DNA readout via mass spectrometry ' , New Biotechnology , vol. 30 , no. 2 , pp. 153-158 . https://doi.org/10.1016/j.nbt.2012.11.003"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Multiplex protein detection with DNA readout via mass spectrometry"}]} +{"author":[{"fullname":"Ikonen, Kari","name":"Kari","pid":[],"rank":1,"surname":"Ikonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"dateofcollection":"2020-10-14T13:13:18.619Z","dateoftransformation":"2020-10-14T14:38:03.661Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::7bba8d87fe65db9b20219f5d3ed6e7c5","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/aad07a47-f575-4696-9323-826722e44745"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720067542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-22T07:59:56Z","harvestDate":"2020-10-14T13:13:18.619Z","identifier":"oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ikonen , K 1986 , ' Rakenneanalyysiohjelmat ' , Paper presented at Ydinvoimalaitosten turvallisuustutkimuksessa Suomessa käytetyt tietokoneohjelmat , Lappeenranta , Finland , 1/01/86 - 31/05/86 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/v1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"V1 Non-published/full refereed conference article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Rakenneanalyysiohjelmat"}]} +{"author":[{"fullname":"Home, Silja","name":"Silja","pid":[],"rank":1,"surname":"Home"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"dateofcollection":"2020-10-14T13:09:44.334Z","dateoftransformation":"2020-10-14T14:40:24.929Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::810ab952d864911e203aaa1a6350e297","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720074531,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-24T10:28:45Z","harvestDate":"2020-10-14T13:09:44.334Z","identifier":"oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Home , S 1993 , ' Oluen kemiaa ' , Dimensio , vol. 57 , no. 5 , pp. 10-15 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Oluen kemiaa"}]} +{"author":[{"fullname":"Mattila, Sakari","name":"Sakari","pid":[],"rank":1,"surname":"Mattila"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2020-10-14T13:09:40.962Z","dateoftransformation":"2020-10-14T14:46:53.279Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::8b1f6bc7e8243f4438937be16e76d8d0","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720088014,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-27T09:00:30Z","harvestDate":"2020-10-14T13:09:40.962Z","identifier":"oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mattila , S 1991 , ' Puoliksi avointa ' , Tietotekniikka , vol. 37 , no. 5 , 21 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puoliksi avointa"}]} +{"author":[{"fullname":"Valo, Matti","name":"Matti","pid":[],"rank":1,"surname":"Valo"},{"fullname":"Debarberis, L.","name":"L.","pid":[],"rank":2,"surname":"Debarberis"},{"fullname":"Kryukov, A.","name":"A.","pid":[],"rank":3,"surname":"Kryukov"},{"fullname":"Chernobaeva, A.","name":"A.","pid":[],"rank":4,"surname":"Chernobaeva"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-01-01"},"dateofcollection":"2020-10-14T13:04:23.268Z","dateoftransformation":"2020-10-14T16:25:31.649Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The dependence of the recovery of the transition temperature shift after annealing (475 °C, 100 h) on copper and phosphorus contents has been studied on irradiated reactor pressure vessel (RPV) materials. A set of model alloys with low nickel content, lower than 0.2 mass%, was used for the study. Copper and phosphorus contents were varied in a wide range: 0.005–0.99 and 0.002–0.039 mass%, respectively. Recovery efficiency has been estimated by the value of residual embrittlement after annealing, measured in terms of a shift in transition temperature (ΔTKres). A comparison of the results obtained on model alloys with data for VVER-440 RPV materials has also been carried out. Comparative analysis has confirmed the conclusion that ΔTKres is independent of phosphorus content while the effect of copper on ΔTKres is not significant for typical VVER-440 RPV materials with a typical range of Cu contents between 0.10 and 0.24 mass%. However, for model alloys with a wider range of copper content, copper mainly controls the value of ΔTKres."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::d5fd949409bb3843b43c4b4ef2b9777e","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/0ed9fc8b-a19d-488f-bbe9-b72d46e93cf5","https://doi.org/10.1016/j.ijpvp.2007.05.002"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720188893,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-02-05T01:22:31Z","harvestDate":"2020-10-14T13:04:23.268Z","identifier":"oai:cris.vtt.fi:publications/0ed9fc8b-a19d-488f-bbe9-b72d46e93cf5","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/0ed9fc8b-a19d-488f-bbe9-b72d46e93cf5"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.ijpvp.2007.05.002"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Valo , M , Debarberis , L , Kryukov , A & Chernobaeva , A 2008 , ' Copper and phosphorus effect on residual embrittlement of irradiated model alloys and RPV steels after annealing ' , International Journal of Pressure Vessels and Piping , vol. 85 , no. 8 , pp. 575-579 . https://doi.org/10.1016/j.ijpvp.2007.05.002"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Annealing"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Irradiation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Recovery"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Transition temperature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Copper"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Phosphorus"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Copper and phosphorus effect on residual embrittlement of irradiated model alloys and RPV steels after annealing"}]} +{"author":[{"fullname":"Viitaniemi, Pertti","name":"Pertti","pid":[],"rank":1,"surname":"Viitaniemi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.348Z","dateoftransformation":"2020-10-14T16:58:47.202Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f1fa9e5d1ba36533cb0afb54538a6b09","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719769155,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-22T10:46:50Z","harvestDate":"2020-10-14T13:09:13.348Z","identifier":"oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Viitaniemi , P 1988 , ' Puun kierteisyys ja sen vaikutus sahatavaran laatuun ' , Sahamies , no. 9 , pp. 260-264 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puun kierteisyys ja sen vaikutus sahatavaran laatuun"}]} +{"author":[{"fullname":"Varjonen, Suvi","name":"Suvi","pid":[],"rank":1,"surname":"Varjonen"},{"fullname":"Laaksonen, Päivi","name":"Päivi","pid":[],"rank":2,"surname":"Laaksonen"},{"fullname":"Paananen, Arja","name":"Arja","pid":[],"rank":3,"surname":"Paananen"},{"fullname":"Valo, Hanna","name":"Hanna","pid":[],"rank":4,"surname":"Valo"},{"fullname":"Hähl, Hendrik","name":"Hendrik","pid":[],"rank":5,"surname":"Hähl"},{"fullname":"Laaksonen, Timo","name":"Timo","pid":[],"rank":6,"surname":"Laaksonen"},{"fullname":"Linder, Markus","name":"Markus","pid":[],"rank":7,"surname":"Linder"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"dateofcollection":"2020-10-14T13:00:59.594Z","dateoftransformation":"2020-10-14T17:14:32.702Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"One central problem for the function and manufacture of materials where performance relies on nanoscale structure is to control the compatibility and interactions of the building blocks. In natural materials, such as nacre, there are examples of multifunctional macromolecules that have combined binding affinities for different materials within the same molecule, thereby bridging these materials and acting as a molecular glue. Here, we describe the use of a designed multifunctional protein that is used for self-assembly of nanofibrillar cellulose. Recent advances in the production of cellulose nanofibrils have given inspiration for new uses of cellulosic materials. Cellulose nanofibrils have mechanical and structural features that open new possibilities for performance in composites and other nanoscale materials. Functionalisation was realised through a bi-functional fusion protein having both an ability to bind to cellulose and a second functionality of surface activity. The cellulose-binding function was obtained using cellulose-binding domains from cellulolytic enzymes and the surface activity through the use of a surface active protein called hydrophobin. Using the bi-functional protein, cellulose nanofibrils could be assembled into tightly packed thin films at the air/water interface and at the oil/water interface. It was shown that the combination of protein and cellulose nanofibrils resulted in a synergistic improvement in the formation and stability of oil-in-water emulsions resulting in emulsions that were stable for several months. The bi-functionality of the protein also allowed the binding of hydrophobic solid drug nanoparticles to cellulose nanofibrils and thereby improving their long-term stability under physiological conditions."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5521b424-20a0-4f8c-8c70-505af50c5fef","https://doi.org/10.1039/C0SM01114B"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719787721,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-24T01:09:04Z","harvestDate":"2020-10-14T13:00:59.594Z","identifier":"oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1039/C0SM01114B"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Varjonen , S , Laaksonen , P , Paananen , A , Valo , H , Hähl , H , Laaksonen , T & Linder , M 2011 , ' Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins ' , Soft Matter , vol. 7 , no. 6 , pp. 2402-2411 . https://doi.org/10.1039/C0SM01114B"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins"}]} +{"author":[{"fullname":"Macharia, Bodia","name":"Bodia","pid":[],"rank":1,"surname":"Macharia"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Crosta, Suzanne"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"French"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"dateofcollection":"2020-09-30T05:06:26.491Z","dateoftransformation":"2020-10-09T05:07:41.329Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Ce qui nous frappe en premier lieu dans le roman de Melchior Mbonimpa, Le Totem des Baranda, c'est l'efficacité de sa fonction didactique. Le livre est porteur d'un message, d'une vision sur l'histoire de l'Afrique des Grands Lacs qu'on rencontre rarement dans l'actualité ou l'histoire immédiate que nous livrent les médias.

Cette thèse se penchera sur un aspect de ce roman qui a particulièrement attiré notre attention: la représentation de la résistance à travers l'affirmation identitaire des personnages féminins. Dans notre analyse de ce thème, nous accordons une grande importance au contexte socioculturel et à l'imaginaire collectif qui nourrissent l'auteur et dans lesquels s'inscrivent ses personnages. L'étude soulignera la fonction sociale de l'écrivain et relèvera la contribution de la fiction à la culture africaine. Nous mettrons en évidence les positions idéologiques de l'auteur et le message de sensibilisation qu'il adresse à diverses catégories de lecteurs: ceux qui se trouvent en Afrique, les Africains de la diaspora, et les lecteurs non-Africains qui s'intéressent à son œuvre. Cette orientation idéologique affecte évidemment la manière dont s'exprime la résistance des pnncIpaux personnages féminins du roman. Elle détermine la place que cette fiction accorde à la femme, et qui correspond sans doute à la place que, selon les souhaits de l'auteur, la femme devrait occuper dans le contexte culturel africain. Notre étude insiste aussi sur le fait que dans ce roman, la modernité se nourrit de la culture traditionnelle et ne renie pas les valeurs du passé.

Ainsi, le parcours narratif montre que dans leur résistance, les héroïnes de la généalogie vivent à leur époque, mais chacune porte plus loin une mission transmise par les précédentes.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::4ebf4c756904fa8a37615e44b1200332","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/10605"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720429600,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:27Z","harvestDate":"2020-09-30T05:06:26.491Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/10605","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/10605"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Pour une renaissance généalogique: résistance féminine chez Melchior Mbonimpa"}]} +{"author":[{"fullname":"Krause, Walter Thomas","name":"Walter Thomas","pid":[],"rank":1,"surname":"Krause"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Datars, W.R."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Physics"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"dateofcollection":"2020-09-30T05:06:17.843Z","dateoftransformation":"2020-10-09T05:07:43.404Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

The superconducting transition in the magnetic resistance of members of the family of bismuth based high temperature superconductors was investigated. Measurements were performed in magnetic fields up to 1.7 T. Small current densities ranging from 0.03 A/cm² to 3.0 A/cm² were applied. The resistivity of Bi₂Sr₂CaCu₂Ox single crystals was analyzed in terms of the thermally activated flux flow expression, ρ =ρ₀U/T exp(-U/T) where T is the temperature. It was found that the activation energy was given by Uα(Hsinθ)^(⁻α) where α≈1/3 and that the prefactor had the form, ρ₀Hsinθ, where H was the applied field and θ the angle of the field with respect to the CuO₂ planes. Results demonstrated that dissipation could be accounted for by the motion of two-dimensional vortices whose density is given by the field, Hsinθ, projected on the CuO₂ planes. Measurements of the resistivity and current dependent resistivity were performed with two Sn-doped and two Sb-doped polycrystalline Bi(1.7)Pb(0.3)Sr₂Ca₂Cu₃O(y) samples. Features in the temperature derivative of the resistivity curves were associated with the presence of a superconducting transition between superconducting grains, coupled by weak links with a distribution of critical currents and critical temperatures, and the superconducting transition within grains. The transition between grains was more strongly suppressed in temperature with the application of a magnetic field in samples with weaker coupling between grains. The presence of a transition in a magnetic field due to weak links between grains was verified at 77 K by the observation of a current dependent resistivity in a magnetic field. Measurements of a Bi₂Sr₂CaCu₂Ox diffusion grown thick film ring were done. The transverse voltage, the voltage at the centre of a 120 μm thick branch with respect to the centre of a 76 μm thick branch, was measured. A higher critical temperature from the presence of more texturing in the 76 μm branch as determined by separate resistivity, x-ray and scanning electron microscopy measurements was consistent with the measurement of a crossover from a negative to positive transverse voltage as the temperature of the sample went through its superconducting transition.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Doctor of Philosophy (PhD)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::57e9c0e7f2803e74fef30e18bab5e450","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/8621"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720431464,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:31:35Z","harvestDate":"2020-09-30T05:06:17.843Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/8621","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/8621"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Magnetic field resistivity of superconducting bismuth oxides"}]} +{"author":[{"fullname":"Sharp, Jeremy","name":"Jeremy","pid":[],"rank":1,"surname":"Sharp"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Savage, A."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"English"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"dateofcollection":"2020-09-30T05:06:27.768Z","dateoftransformation":"2020-10-09T05:07:56.297Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

This study examines Leonard Cohen's novel Beautiful Losers through the lenses of allegorical and authorial theories to appreciate how the novel uses allegorical techniques to code into symbolic terms an exploration of the polysemous nature of the word ''translation.'' The first chapter studies the stylistic and conceptual dimensions of allegory as a literary genre - as critics like Northrop Frye, Angus Fletchet, and Maureen Quilligan help to define it - while arguing that Cohen's novel is consciously allegorical, challenging readers to interpret what it \"means,\" or may mean. The second chapter performs an intensive re-reading of Beautiful Losers, examining how the novel uses complex systems of verbal play (particularly puns) to coordinate a reunification of various dichotomies historical \"reality\"/imaginative myth, secularity/spirituality, enslavement/sanctification, among others - employed throughout the text. The thesis concludes that the novel is perpetually playing with various types of translation (spiritual, linguistic, physical, and so forth), affirming the need for emotionally-charged, devotional forms of expression (like song and prayer) over more clinical attempts to reorder or recreate the world and its inhabitants. Ultimately, this discussion argues that an understanding of the allegorical dimensions of Beautiful Losers may illuminate how Cohen's other works (particularly his songs) may be studied as attempts to associate word with voice, to emphasize the process of expression (translation) rather than just the finished product.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::8ab8cb6d096b31eb67b4aaf43ca2d75f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/11059"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720442126,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:51Z","harvestDate":"2020-09-30T05:06:27.768Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/11059","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/11059"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"''What'd I Say?\": Beautiful Losers' Allegory of Translation"}]} +{"author":[{"fullname":"Almeida, Henrique","name":"Henrique","pid":[],"rank":1,"surname":"Almeida"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::f54ac940e822726208c7b2dd526e4849","value":" Revistas Científicas da Universidade Católica Portuguesa"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2004-01-01"},"dateofcollection":"2020-02-25T13:35:11.629Z","dateoftransformation":"2020-09-03T20:15:27.783Z","description":[],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|a587306331dd::0f52c20c167a5494879e0614164585f0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::f54ac940e822726208c7b2dd526e4849","value":" Revistas Científicas da Universidade Católica Portuguesa"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2004-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::f54ac940e822726208c7b2dd526e4849","value":" Revistas Científicas da Universidade Católica Portuguesa"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by/4.0"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://revistas.ucp.pt/index.php/mathesis/article/view/3918"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"0872-0215","issnPrinted":"0872-0215","name":"Máthesis","sp":"","vol":""},"language":{"classid":"por","classname":"Portuguese","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720006001,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frevistas.ucp.pt%2Findex.php%2Findex%2Foai","datestamp":"2020-02-20T15:56:11Z","harvestDate":"2020-02-25T13:35:11.629Z","identifier":"oai:ojs.revistas.ucp.pt:article/3918","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:ojs.revistas.ucp.pt:article/3918"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.34632/mathesis.2004.3918"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Universidade Católica Portuguesa"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Máthesis; n. 13 (2004); 147-157"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0872-0215"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"10.34632/mathesis.2004.n13"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Aquilino Ribeiro e a crítica: reconstituição de um episódio polémico"}]} +{"author":[{"fullname":"Sintonen, Matti","name":"Matti","pid":[],"rank":1,"surname":"Sintonen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-03-22"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:36.322Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Edellisissä muistikuvissa käsittelin akateemikkoja Eino Kaila, G. H. von Wright ja Oiva Ketonen. Nyt on vuorossa keväällä 2017 nimitetty Ilkka Niiniluoto."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::3afa40ff6212f33711b2b125a05da061","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-03-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/69941"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"36"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720418888,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2018-03-22T06:05:40Z","harvestDate":"2020-08-27T12:58:53.52Z","identifier":"oai:journal.fi:article/69941","metadataNamespace":""}},"originalId":["oai:journal.fi:article/69941"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Muistikuvia"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Täyskäsi – Ilkka Niiniluoto"}]} +{"author":[{"fullname":"Hetemäki, Ilari","name":"Ilari","pid":[],"rank":1,"surname":"Hetemäki"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:44.425Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/1850"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"3","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"27"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720425037,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-08-27T12:42:47.579Z","identifier":"oai:journal.fi:article/1850","metadataNamespace":""}},"originalId":["oai:journal.fi:article/1850"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Lyhyesti"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Lyhyesti"}]} +{"author":[{"fullname":"Kulonen[-Korhonen], Ulla[-Maija]","pid":[],"rank":1}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:51.844Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvio Abondolo, Daniel Mario: Hungarian inflectional morphology Kielenainekset etuprosodinen (kieli: suomi, sivulla: 254) juuri (kieli: suomi, sivulla: 254) koodi (kieli: suomi, sivulla: 254) subjektikonjugaatio (kieli: suomi, sivulla: 255) takaprosodinen (kieli: suomi, sivulla: 254)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::4c60c95783c4b240747e52990e709573","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0015","classname":"Review","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/virittaja/article/view/38371"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"253","iss":"2","issnLinking":"","issnOnline":"2242-8828","issnPrinted":"0042-6806","name":"Virittäjä","sp":"253","vol":"94"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720430784,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:05Z","harvestDate":"2020-08-27T14:00:01.261Z","identifier":"oai:journal.fi:article/38371","metadataNamespace":""}},"originalId":["oai:journal.fi:article/38371"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kotikielen Seura"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Virittäjä"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuutta"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tuore näkemys unkarin taivutusmorfologiasta"}]} +{"author":[{"fullname":"Kerppola-Pesu, Jenni","name":"Jenni","pid":[],"rank":1,"surname":"Kerppola-Pesu"},{"fullname":"Halme, Nina","name":"Nina","pid":[],"rank":2,"surname":"Halme"},{"fullname":"Pietilä, Anna-Maija","name":"Anna-Maija","pid":[],"rank":3,"surname":"Pietilä"},{"fullname":"Perälä, Marja-Leena","name":"Marja-Leena","pid":[],"rank":4,"surname":"Perälä"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:55.86Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vanhempien osallisuuden vahvistaminen sekä oikeus tulla kuulluksi ovat keskeisiä lasten palveluja ohjaavia periaatteita. Osallisuuden toteutumisessa on kuitenkin edelleen puutteita. Tämän tutkimuksen tarkoituksena oli selvittää päihdepalvelujen esimiesten käsityksiä siitä, miten päihdepalvelujen piirissä olevien vanhempien osallisuutta tuetaan. Osallisuuden tukemista arvioitiin työntekijöille modifiodulla Family Empowerment Scale -mittarilla (FES). Aineisto kerättiin päihdepalveluissa toimivilta esimiehiltä (n=372). Vastausprosentti oli 36. Taustamuuttujien perusteella määräytyvien vastaajaryhmien väliset erot analysoitiin riippumattomien otosten t-testillä sekä yksisuuntaisella varianssianalyysillä. Vanhempien osallisuuden tukeminen toteutui kohtuullisesti kaikissa toimipisteissä. Merkittävimmät kehittämiskohteet liittyivät perheiden riittämättömään tiedonsaantiin, heikkoihin palautteen antomahdollisuuksin, perheen ja henkilöstön välisen yhteistyön sekä vanhempien yhteiskunnallisten vaikutusmahdollisuuksien lisäämiseen. Vastaajien mukaan toimipisteen luonne oli yhteydessä osallisuuden tukemiseen päihdepalveluissa."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::5115f8bae044b12a72b0741673c66fcb","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/sla/article/view/47238"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0355-5097","name":"Sosiaalilääketieteellinen Aikakauslehti","sp":"","vol":"51"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720434259,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2015-07-02T10:20:48Z","harvestDate":"2020-08-27T13:08:26.705Z","identifier":"oai:journal.fi:article/47238","metadataNamespace":""}},"originalId":["oai:journal.fi:article/47238"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteen yhdistys ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteellinen Aikakauslehti"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Päihdepalvelujen piirissä olevien vanhempien osallisuuden tukeminen"}]} +{"author":[{"fullname":"Ritari, Katja","name":"Katja","pid":[],"rank":1,"surname":"Ritari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:21.371Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::72070913a49aa49d3b5abc600f940893","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/scf/article/view/77169"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2242-4261","issnPrinted":"1795-097X","name":"Studia Celtica Fennica","sp":"","vol":"14"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720459568,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2018-12-05T13:07:12Z","harvestDate":"2020-08-27T14:57:55.374Z","identifier":"oai:journal.fi:article/77169","metadataNamespace":""}},"originalId":["oai:journal.fi:article/77169"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Finnish Society for Celtic Studies SFKS ry."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Studia Celtica Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Editorial"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial"}]} +{"author":[{"fullname":"Hoffrén, Jukka","name":"Jukka","pid":[],"rank":1,"surname":"Hoffrén"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:16.036Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Suomalaista hyvinvointiyhteiskuntaa pidettiin pitkään koko kansakuntaa yhdistävänä menestystarinana. Hyvinvoinnin huippukohta saavutettiin 1990-luvun alussa, ja sen jälkeen tarina on saanut entistä enemmän säröjä. Uusien mittareiden mukaan suomalaisten hyvinvointi on polkenut paikallaan tai jopa alentunut, vaikka ruttokansantuotteella (BKT) mitattu talouskasvu onkin saatu jatkumaan voimakkaana. Suurimpia syitä hyvinvoinnin laskuun ovat tuloerojen kasvaminen, talouden ympäristöön kasautuvan kuormituksen kasvu sekä luonnonvarojen kiihtyvä kulutus. Jälkiteolliseen yhteiskuntaan siirtyminen muuttaa tuotanto- ja elämäntapoja sekä rikkoo aiempia uskomuksia perinteisen talouskasvun siunauksellisuudesta yhteiskunnalliselle kehitykselle."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::7709c0dd641ca56ada58c9378e156648","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/5022"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"30"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720758508,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:32:18.573Z","identifier":"oai:journal.fi:article/5022","metadataNamespace":""}},"originalId":["oai:journal.fi:article/5022"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Suomalaisen hyvinvoinnin tarina"}]} +{"author":[{"fullname":"Tuominen, Kimmo","name":"Kimmo","pid":[],"rank":1,"surname":"Tuominen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1994-01-03"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:26.966Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::79ae333a14362379d40ce2444a68b51f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1994-01-03"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/inf/article/view/1478"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"75","iss":"","issnLinking":"","issnOnline":"1797-9129","issnPrinted":"1797-9137","name":"Informaatiotutkimus","sp":"64","vol":""},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720465388,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-08-27T13:11:48.927Z","identifier":"oai:journal.fi:article/1478","metadataNamespace":""}},"originalId":["oai:journal.fi:article/1478"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Informaatiotutkimuksen yhdistys ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Informaatiotutkimus"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Yksilö tiedonhankkijana : Sense-making -teorian näkökulma tiedonhankintaprosessiin"}]} +{"author":[{"fullname":"Coleman, David","name":"David","pid":[],"rank":1,"surname":"Coleman"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:51.888Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Radical changes in living arrangements, in sexual habits and in the position of marriage in Europe have arisen, very unevenly, since the 1960s and in some regions in scarcely more than a decade. Cohabitation before marriage is normal – even universal – in many countries, with the popularity of marriage falling as its mean age rises to beyond the highest levels hitherto recorded. Divorce has been legalised and in most cases made readily accessible. However, although not as firm a demarcation as once believed, Hajnal’s line separating East and West has not yet been erased from the map of contemporary Europe. The article describes patterns and trends in partnership in Europe, including trends in marriage rates, divorce rates, the spread of cohabitation, LAT-relations and of births outside marriage, and tries to account for them."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::9fd9c0e6da2caa6ac1d655079482b428","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/fypr/article/view/40927"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"49","iss":"","issnLinking":"","issnOnline":"1796-6191","issnPrinted":"1796-6183","name":"Finnish Yearbook of Population Research","sp":"5","vol":"48"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720495570,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2020-06-24T08:28:25Z","harvestDate":"2020-08-27T15:11:11.742Z","identifier":"oai:journal.fi:article/40927","metadataNamespace":""}},"originalId":["oai:journal.fi:article/40927"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.23979/fypr.40927"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"the Family Federation of Finland"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Finnish Yearbook of Population Research"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Articles"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Partnership in Europe; its Variety, Trends and Dissolution"}]} +{"author":[{"fullname":"Siivonen, Katriina","name":"Katriina","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6302-1146"}],"rank":1,"surname":"Siivonen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:57.958Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Yrsa Lindqvist (ed.) 2008. Tradition och turism på Åland. Att använda kulturarven. (Tradition and Tourism in Åland.) Meddelanden från Folkkultursarkivet 21. Skrifter utgivna av Svenska litteratursällskapet i Finland 711. Helsingfors: Svenska litteratursällskapet i Finland. 240 pp. III. ISBN 978-951-583-167-5."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::a974b7e5144d11e293162c96ff33a4f0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/ethnolfenn/article/view/65995"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"101","iss":"","issnLinking":"","issnOnline":"2489-4982","issnPrinted":"0355-1776","name":"Ethnologia Fennica","sp":"100","vol":"36"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720502596,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2017-09-29T12:07:21Z","harvestDate":"2020-08-27T15:37:26.591Z","identifier":"oai:journal.fi:article/65995","metadataNamespace":""}},"originalId":["oai:journal.fi:article/65995"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnos ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnologia Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Book Reviews"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cultural Heritage in Use in Åland"}]} +{"author":[{"fullname":"Portin, Petter","name":"Petter","pid":[],"rank":1,"surname":"Portin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:50.546Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvostelu: Worldwatch-instituutti: Maailman tila 2008. Kestävä talous. Raportti kehityksestä kohti kestävää yhteiskuntaa. Suomentanut Jani Kaaro. Gaudeamus 2008."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::ad462fe2a7230b480118e7d8d37476d5","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/490"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":""},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720348067,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:28:00.546Z","identifier":"oai:journal.fi:article/490","metadataNamespace":""}},"originalId":["oai:journal.fi:article/490"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuus"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ilmastonmuutos – vakava markkinahäiriö"}]} +{"author":[{"fullname":"Kunnas, Niina","name":"Niina","pid":[],"rank":1,"surname":"Kunnas"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-10"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:09:19.343Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Arvioitu teos Anneli Sarhimaa: Vaietut ja vaiennetut. Karjalankieliset karjalaiset Suomessa. Tietolipas 256. Helsinki: Suomalaisen Kirjallisuuden Seura 2017. 297 s. isbn 978-952-222-890-1."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::db6812b4988f5dcf9b65c31e4ece2d53","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-10"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0015","classname":"Review","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/virittaja/article/view/69682"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"3","issnLinking":"","issnOnline":"2242-8828","issnPrinted":"0042-6806","name":"Virittäjä","sp":"","vol":"122"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720388397,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-07-22T21:52:02Z","harvestDate":"2020-10-17T22:57:54.316Z","identifier":"oai:journal.fi:article/69682","metadataNamespace":""}},"originalId":["oai:journal.fi:article/69682"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.23982/vir.69682"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kotikielen Seura"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Virittäjä"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuutta"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tuoretta tietoa Suomessa puhuttavasta karjalan kielestä"}]} +{"author":[{"fullname":"Ben Romdhanne Bilel","pid":[],"rank":1},{"fullname":"Nikaein Navid","pid":[],"rank":2},{"fullname":"Knopp Raymond","pid":[],"rank":3},{"fullname":"Bonnet Christian","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-10-31"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.131Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::72f7ad968fa42cfbf0d3d7b245e43477","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-10-31"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=2069102"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720262482,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1145/2069087.2069102"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"OpenAirInterface large-scale wireless emulation platform and methodology"}]} +{"author":[{"fullname":"Gilles Barthe","pid":[],"rank":1},{"fullname":"Tamara Rezk","pid":[],"rank":2},{"fullname":"Alejandro Russo","pid":[],"rank":3},{"fullname":"Andrei Sabelfeld","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-07-30"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.575Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::cbd6814ee33b6357c7cea7c008a72b80","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-07-30"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=1805977"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720264785,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1145/1805974.1805977"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Security of multithreaded programs by compilation"}]} +{"author":[{"fullname":"Klaus Ostermann","pid":[],"rank":1},{"fullname":"Paolo Giarrusso","pid":[],"rank":2},{"fullname":"Christian Kästner","name":"Christian K.","pid":[],"rank":3,"surname":"Stner"},{"fullname":"Tillmann Rendel","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.741Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=2032509"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720265881,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":""}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Revisiting information hiding"}]} +{"author":[{"fullname":"Hernandez Lopezomoza, Mario Andres","name":"Mario Andres","pid":[],"rank":1,"surname":"Hernandez Lopezomoza"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Institut Supérieur de l'Aéronautique et de l'Espace"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Biannic, Jean-Marc"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Jouhaud, Frank"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"dateofcollection":"2016-02-26T12:03:21.28Z","dateoftransformation":"2020-08-15T08:01:27.526Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ce travail de thèse est consacré à l'extension de l'Inversion Dynamique non-linéaire (NDI-Nonlinear Dynamic Inversion) pour un ensemble plus grand de systèmes non-linéaires, tout en garantissant des conditions de stabilité suffisantes. La NDI a été étudiée dans le cas de diverses applications, y compris en aéronautique et en aérospatiale. Elle permet de calculer des lois de contrôle capables de linéariser et de découpler un modèle non-linéaire à tout point de fonctionnement de son enveloppe d'état. Cependant cette méthode est intrinsèquement non-robuste aux erreurs de modélisation et aux saturations en entrée. En outre, dans un contexte non-linéaire, l'obtention d'une garantie quantifiable du domaine de stabilité atteint reste à l'heure actuelle complexe. Contrairement aux approches classiques de la NDI, notre méthodologie peut être considérée comme un cadre de compensation non-linéaire généralisé qui permet d'intégrer les incertitudes et les saturations en entrée dans le processus de conception. En utilisant des stratégies de contrôle antiwindup, la loi de pilotage peut être calculée grâce à un simple processus en deux phases. Dans ce cadre de travail généralisé des transformations linéaires fractionnaires (LFT - Linear Fractional Transformations) de la boucle fermée non-linéaire peuvent être facilement déduites pour l'analyse de la stabilité robuste en utilisant des outils standards pour de systèmes linéaires. La méthode proposée est testée pour le pilotage d'un véhicule de rentrée atmosphérique de type aile delta lors de ses phases hypersonique, transsonique et subsonique. Pour cette thèse, un simulateur du vol incluant divers facteurs externes ainsi que des erreurs de modélisation a été développé dans Simulink.\n\nThis thesis work is devoted to extending Nonlinear Dynamic Inversion (NDI) for a large scale of\nnonlinear systems while guaranteeing sufficient stability conditions. NDI has been studied in a wide range of applications, including aeronautics and aerospace. It allows to compute nonlinear control laws able to decouple and linearize a model at any operating point of its state envelope. However, this method is inherently non-robust to modelling errors and input saturations. Moreover, obtaining a quantifiable guarantee of the attained stability domain in a nonlinear control context is not a very straightforward task. Unlike standard NDI approaches, our methodology can be viewed as a generalized nonlinear compensation framework which allows to incorporate uncertainties and input saturations in the design process. Paralleling anti-windup strategies, the controller can be computed through a single multichannel optimization problem or through a simple two-step process. Within this framework, linear fractional transformations of the nonlinear closed-loop can be easily derived for robust stability analysis using standard tools for linear systems. The proposed method is tested for the flight control of a delta wing type reentry vehicle at hypersonic, transonic and subsonic phases of the atmospheric reentry. For this thesis work, a Flight Mechanics simulator including diverse external factors and modelling errors was developed in Simulink."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|artemis___fr::630e47d8b572e3df0e91327d6d8f036d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"distributionlocation":"","hostedby":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://depozit.isae.fr/theses/2012/2012_Hernandez_Lopezomoza_Mario_Andres.pdf"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720142745,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http://ori-oai.isae.fr/ori-oai-repository/OAIHandler","datestamp":"2013-06-13","harvestDate":"2016-02-26T12:03:21.28Z","identifier":"oai:isae-repo.fr:isae-371","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:isae-repo.fr:isae-371"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Compensation non-linéaire généralisée"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Inversion dynamic non-linéaire"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande anti-windup"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande robuste"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Transformations linéaires fractionnaires"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande H-infinie non-lisse"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Generalized nonlinear compensation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Nonlinear dynamic inversion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Anti-windup control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Robust control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Linear fractional transformation, Nonsmooth H-infinity control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Atmospheric reentry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"629.8"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cadre de travail généralisé de compensation non-linéaire robuste : application à la rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A generalized framework for robust nonlinear compensation : application to an atmospheric reentry control problem"}]} +{"author":[{"fullname":"Manea, Florin","name":"Florin","pid":[],"rank":1,"surname":"Manea"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-10-01"},"dateofcollection":"2017-09-25T17:25:29.95Z","dateoftransformation":"2019-01-23T10:12:38.35Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|base_oa_____::020ca6ee0ae16e1e9c1405207087a671","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-10-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.elsevier.com/open-access/userlicense/1.0/"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.1016/j.tcs.2012.06.029"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"79","iss":"","issnLinking":"","issnOnline":"","issnPrinted":"0304-3975","name":"Theoretical Computer Science","sp":"65","vol":"456"},"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720301847,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"file:///mnt/downloaded_dumps/mdstore/base/base_winnower_elsevier.xml.gz","datestamp":"","harvestDate":"2017-09-25T17:25:29.95Z","identifier":"","metadataNamespace":""}},"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.tcs.2012.06.029"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Elsevier BV"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Theoretical Computer Science"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Computer Science(all)"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Complexity results for deciding Networks of Evolutionary Processors"}]} +{"author":[{"fullname":"Yanlong, Yin","name":"Yin","pid":[],"rank":1,"surname":"Yanlong"},{"fullname":"Mingkai, Yao","name":"Yao","pid":[],"rank":2,"surname":"Mingkai"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-10-01"},"dateofcollection":"2017-09-25T17:16:18.455Z","dateoftransformation":"2019-01-23T10:13:00.54Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|base_oa_____::02715dd9fc09b87966f3bad613aec7f9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-10-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.elsevier.com/open-access/userlicense/1.0/"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.1016/j.jacc.2014.06.769"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"16","issnLinking":"","issnOnline":"","issnPrinted":"0735-1097","name":"Journal of the American College of Cardiology","sp":"","vol":"64"},"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720310721,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"file:///mnt/downloaded_dumps/mdstore/base/base_winnower_elsevier.xml.gz","datestamp":"","harvestDate":"2017-09-25T17:16:18.455Z","identifier":"","metadataNamespace":""}},"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.jacc.2014.06.769"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Elsevier BV"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Cardiology and Cardiovascular Medicine"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"GW25-e1474 Radiofrequency catheter ablation via radial artery approach for left-sided atrioventricuar accessory pathways"}]} +{"author":[{"fullname":"Reed, Jonathan C","name":"Jonathan C.","pid":[],"rank":1,"surname":"Reed"},{"fullname":"Kasschau, Kristin D","name":"Kristin D.","pid":[],"rank":2,"surname":"Kasschau"},{"fullname":"Prokhnevsky, Alexey I","name":"Alexey I.","pid":[],"rank":3,"surname":"Prokhnevsky"},{"fullname":"Gopinath, Kodetham","name":"Kodetham","pid":[],"rank":4,"surname":"Gopinath"},{"fullname":"Pogue, Gregory P","name":"Gregory P.","pid":[],"rank":5,"surname":"Pogue"},{"fullname":"Carrington, James C","name":"James C.","pid":[],"rank":6,"surname":"Carrington"},{"fullname":"Dolja, Valerian V","name":"Valerian V.","pid":[],"rank":7,"surname":"Dolja"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-02-01"},"dateofcollection":"2017-09-25T17:22:28.969Z","dateoftransformation":"2019-01-23T10:13:50.503Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|base_oa_____::035f21f0844d85933ada45818ab13f5d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-02-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.elsevier.com/open-access/userlicense/1.0/"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.1016/s0042-6822(02)00051-x"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"209","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0042-6822","name":"Virology","sp":"203","vol":"306"},"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720329442,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"file:///mnt/downloaded_dumps/mdstore/base/base_winnower_elsevier.xml.gz","datestamp":"","harvestDate":"2017-09-25T17:22:28.969Z","identifier":"","metadataNamespace":""}},"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/s0042-6822(02)00051-x"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Elsevier BV"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Virology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Suppressor of RNA silencing encoded by Beet yellows virus"}]} +{"author":[{"fullname":"Miller, R.K","name":"R. K.","pid":[],"rank":1,"surname":"Miller"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1968-05-01"},"dateofcollection":"2017-09-25T17:23:20.638Z","dateoftransformation":"2019-01-23T10:16:13.468Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|base_oa_____::06505050e503a2e7f9167ba704fbeaac","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1968-05-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.elsevier.com/open-access/userlicense/1.0/"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.1016/0022-247x(68)90176-5"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"340","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0022-247X","name":"Journal of Mathematical Analysis and Applications","sp":"319","vol":"22"},"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720390392,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"file:///mnt/downloaded_dumps/mdstore/base/base_winnower_elsevier.xml.gz","datestamp":"","harvestDate":"2017-09-25T17:23:20.638Z","identifier":"","metadataNamespace":""}},"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/0022-247x(68)90176-5"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Elsevier BV"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Applied Mathematics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Analysis"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"On Volterra integral equations with nonnegative integrable resolvents"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relations.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relations.json new file mode 100644 index 000000000..6478a01ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relations.json @@ -0,0 +1,30 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::5ac587eb28411c351c2e357eb097fd3d","subRelType":"provision","target":"20|doajarticles::b9e2aa37baa5c9b72d7dee964a442bda"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::690b3aaf177a4c70b81bacd8d023cbdc","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::7a71f278237d1ab35088efda03fa007a","subRelType":"provision","target":"20|doajarticles::03748bcb5d754c951efec9700e18a56d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::8b75543067b50076e70764917e188178","subRelType":"provision","target":"20|doajarticles::50cb15ff7a6a3f8531f063770179e346"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::9f3ff882f023209d9ffb4dc32b77d376","subRelType":"provision","target":"20|doajarticles::ffc1811633b3222e4764c7b0517f83e8"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::b566fa319c3923454e1e8eb886ab62d2","subRelType":"provision","target":"20|dedup_wf_001::4e6c928fef9851b37ec73f4f6daca35b"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","subRelType":"provision","target":"20|dedup_wf_001::846b777af165fef7c904a81712a83b66"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::fbf7592ddbf2ad3cc0ed70c0f2e1d67c","subRelType":"provision","target":"20|dedup_wf_001::1b965e2c0c53e5526d269d63bcfa0ae6"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::fd4c399077127f0ba09b5205e2b78406","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|infrastruct_::f66f1bd369679b5b077dcdf006089556","value":"OpenAIRE"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|openaire____::8f991165fae922e29ad55d592f568464","subRelType":"provision","target":"20|openaire____::ec653e804967133b9436fdd30d3ff51d"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::16d11e9595188dbad0418a85f0351aba","subRelType":"provision","target":"20|opendoar____::041abd8c990fc531ab9bd2674a0e2725"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::46d3f6029f6170ebccb28945964d09bf","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::7501e5d4da87ac39d782741cd794002d","subRelType":"provision","target":"20|dedup_wf_001::04e2c34ef4daa411ff2497afc807b612"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::75b9b6dc7fe44437c6e0a69fd863dbab","subRelType":"provision","target":"20|dedup_wf_001::ad30fbc9b3b6f5370e59e58c456b7e19"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::d35b05a832e2bb91f110d54e34e2da79","subRelType":"provision","target":"20|opendoar____::589618708434cfc5b830601ac4b339ee"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::fc2e6a440b94f64831840137698021e1","subRelType":"provision","target":"20|opendoar____::a82c7e358792e0018235b7f196fec4ed"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::128284e3cc130ee9c68c6955bf4d1385","subRelType":"provision","target":"20|dedup_wf_001::a9651cfb29790a31c580ccb5bffa9349"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::48b3cc4b8c5951621730829c60b1c205","subRelType":"provision","target":"20|dedup_wf_001::5bc6fca7649010470f1cc11f6675ffb3"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::8e7a079ee6d2d4933db9b898c789c2f8","subRelType":"provision","target":"20|dedup_wf_001::0b0ae5b38aedc082a8b089abdf3b752f"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::b26c86eba2b3ad8b242a93b581e6ec8e","subRelType":"provision","target":"20|re3data_____::e841a40265d8d0a6739ac71f56328da3"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::cafe7980294aa5f935f433e7c8aab844","subRelType":"provision","target":"20|dedup_wf_001::2806db65ba8029ee196679cad067eff2"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::0cd5965141113df5739f1ac7ac7f6d37","subRelType":"participation","target":"40|aka_________::1bc716a1763110da3eb1af867de718a8"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::1e2df822bf0932ad0f77565789f22e17","subRelType":"participation","target":"40|aka_________::a6c805bcfd383bae043d8df38e79db78"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8966"},"lastupdatetimestamp":1603564783812,"properties":[],"relClass":"isProducedBy","relType":"resultOrganization","target":"20|aka_________::2c3aab6bce7516338b4dbfb4f6f86db7","subRelType":"affiliation","source":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::8bcd11b6bffc76f4d4f88d7a6728d614","subRelType":"participation","target":"40|aka_________::645123c3fe7bab557c36f0f9bb02a4cd"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::8bcd11b6bffc76f4d4f88d7a6728d614","subRelType":"participation","target":"40|aka_________::fecf4f862a6b40dd2ccb1abc8fed5bc5"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::a30b9a45766293af38951d767e77a471","subRelType":"participation","target":"40|aka_________::68ce7288b1b036f73a1ff951c6524eba"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::a30b9a45766293af38951d767e77a471","subRelType":"participation","target":"40|aka_________::b733217d1cd609001dd3c75af419d872"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::a30b9a45766293af38951d767e77a471","subRelType":"participation","target":"40|aka_________::c33dee8231ad0374caf93e52c5a473e5"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json new file mode 100644 index 000000000..fda631c2f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json @@ -0,0 +1,32 @@ +{"author":[{"fullname":"Niskala, Eino","name":"Eino","pid":[],"rank":1,"surname":"Niskala"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.375Z","dateoftransformation":"2020-10-14T13:55:20.918Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::2cfa9f434e854612c7cbdeb43433ac24","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719957520,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-30T11:16:46Z","harvestDate":"2020-10-14T13:09:13.375Z","identifier":"oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/a3a29ce0-cdf2-47fa-980c-078573244d3c"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Niskala , E 1988 , ' Puutalon ulkovaipan korjaus ' , Kodinrakentaja , no. 3 , pp. 57-60 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puutalon ulkovaipan korjaus"}]} +{"author":[{"fullname":"Ojala, Marja","name":"Marja","pid":[],"rank":1,"surname":"Ojala"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"dateofcollection":"2020-10-14T13:02:41.238Z","dateoftransformation":"2020-10-14T14:15:44.877Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::530df3a532f711fd65c9248f9e575b60","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/111b446c-4dd9-4a3b-8609-fb44c461fe03","https://doi.org/10.1002/jhrc.1240161114"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720010422,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-01-01T02:18:57Z","harvestDate":"2020-10-14T13:02:41.238Z","identifier":"oai:cris.vtt.fi:publications/111b446c-4dd9-4a3b-8609-fb44c461fe03","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/111b446c-4dd9-4a3b-8609-fb44c461fe03"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1002/jhrc.1240161114"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ojala , M 1993 , ' Simultaneous separation and determination of chlorobenzenes, PCBs, and chlorophenols using silica gel fractionation and GC-ECD analysis ' , HRC Journal of High Resolution Chromatography , vol. 16 , no. 11 , pp. 679 - 682 . https://doi.org/10.1002/jhrc.1240161114"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/b1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"B1 Unrefereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Simultaneous separation and determination of chlorobenzenes, PCBs, and chlorophenols using silica gel fractionation and GC-ECD analysis"}]} +{"author":[{"fullname":"Flanigon, James","name":"James","pid":[],"rank":1,"surname":"Flanigon"},{"fullname":"Kamali-Moghaddam, Masood","name":"Masood","pid":[],"rank":2,"surname":"Kamali-Moghaddam"},{"fullname":"Burbulis, Ian","name":"Ian","pid":[],"rank":3,"surname":"Burbulis"},{"fullname":"Annink, Carla","name":"Carla","pid":[],"rank":4,"surname":"Annink"},{"fullname":"Steffen, Martin","name":"Martin","pid":[],"rank":5,"surname":"Steffen"},{"fullname":"Oeth, Paul","name":"Paul","pid":[],"rank":6,"surname":"Oeth"},{"fullname":"Brent, Roger","name":"Roger","pid":[],"rank":7,"surname":"Brent"},{"fullname":"van den Boom, Dirk","name":"Dirk","pid":[],"rank":8,"surname":"Den Boom"},{"fullname":"Landegren, Ulf","name":"Ulf","pid":[],"rank":9,"surname":"Landegren"},{"fullname":"Cantor, Charles","name":"Charles","pid":[],"rank":10,"surname":"Cantor"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"2020-10-14T12:59:43.293Z","dateoftransformation":"2020-10-14T14:18:11.463Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Multiplex protein quantification has been constrained by issues of assay specificity, sensitivity and throughput. This research presents a novel approach that overcomes these limitations using antibody–oligonucleotide conjugates for immuno-polymerase chain reaction (immuno-PCR) or proximity ligation, coupled with competitive PCR and MALDI-TOF mass spectrometry. Employing these combinations of technologies, we demonstrate multiplex detection and quantification of up to eight proteins, spanning wide dynamic ranges from femtomolar concentrations, using only microliter sample volumes."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::57ce39c409b63b6db7d3dec83b5a80d7","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/1f6a6cfe-8e22-4074-abae-6b094ecac56b","https://doi.org/10.1016/j.nbt.2012.11.003"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720017299,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-01-01T01:49:46Z","harvestDate":"2020-10-14T12:59:43.293Z","identifier":"oai:cris.vtt.fi:publications/1f6a6cfe-8e22-4074-abae-6b094ecac56b","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/1f6a6cfe-8e22-4074-abae-6b094ecac56b"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.nbt.2012.11.003"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Flanigon , J , Kamali-Moghaddam , M , Burbulis , I , Annink , C , Steffen , M , Oeth , P , Brent , R , van den Boom , D , Landegren , U & Cantor , C 2013 , ' Multiplex protein detection with DNA readout via mass spectrometry ' , New Biotechnology , vol. 30 , no. 2 , pp. 153-158 . https://doi.org/10.1016/j.nbt.2012.11.003"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Multiplex protein detection with DNA readout via mass spectrometry"}]} +{"author":[{"fullname":"Ikonen, Kari","name":"Kari","pid":[],"rank":1,"surname":"Ikonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"dateofcollection":"2020-10-14T13:13:18.619Z","dateoftransformation":"2020-10-14T14:38:03.661Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::7bba8d87fe65db9b20219f5d3ed6e7c5","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1986-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/aad07a47-f575-4696-9323-826722e44745"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720067542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-22T07:59:56Z","harvestDate":"2020-10-14T13:13:18.619Z","identifier":"oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/aad07a47-f575-4696-9323-826722e44745"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ikonen , K 1986 , ' Rakenneanalyysiohjelmat ' , Paper presented at Ydinvoimalaitosten turvallisuustutkimuksessa Suomessa käytetyt tietokoneohjelmat , Lappeenranta , Finland , 1/01/86 - 31/05/86 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/v1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"V1 Non-published/full refereed conference article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Rakenneanalyysiohjelmat"}]} +{"author":[{"fullname":"Home, Silja","name":"Silja","pid":[],"rank":1,"surname":"Home"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"dateofcollection":"2020-10-14T13:09:44.334Z","dateoftransformation":"2020-10-14T14:40:24.929Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::810ab952d864911e203aaa1a6350e297","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1993-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720074531,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-24T10:28:45Z","harvestDate":"2020-10-14T13:09:44.334Z","identifier":"oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5f4949a8-3510-4729-ae67-4a80bca40ce8"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Home , S 1993 , ' Oluen kemiaa ' , Dimensio , vol. 57 , no. 5 , pp. 10-15 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Oluen kemiaa"}]} +{"author":[{"fullname":"Mattila, Sakari","name":"Sakari","pid":[],"rank":1,"surname":"Mattila"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"dateofcollection":"2020-10-14T13:09:40.962Z","dateoftransformation":"2020-10-14T14:46:53.279Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::8b1f6bc7e8243f4438937be16e76d8d0","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1991-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720088014,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-27T09:00:30Z","harvestDate":"2020-10-14T13:09:40.962Z","identifier":"oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/8d32d1cc-7dad-4b20-8974-723ab9e7b3f1"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Mattila , S 1991 , ' Puoliksi avointa ' , Tietotekniikka , vol. 37 , no. 5 , 21 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puoliksi avointa"}]} +{"author":[{"fullname":"Valo, Matti","name":"Matti","pid":[],"rank":1,"surname":"Valo"},{"fullname":"Debarberis, L.","name":"L.","pid":[],"rank":2,"surname":"Debarberis"},{"fullname":"Kryukov, A.","name":"A.","pid":[],"rank":3,"surname":"Kryukov"},{"fullname":"Chernobaeva, A.","name":"A.","pid":[],"rank":4,"surname":"Chernobaeva"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-01-01"},"dateofcollection":"2020-10-14T13:04:23.268Z","dateoftransformation":"2020-10-14T16:25:31.649Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The dependence of the recovery of the transition temperature shift after annealing (475 °C, 100 h) on copper and phosphorus contents has been studied on irradiated reactor pressure vessel (RPV) materials. A set of model alloys with low nickel content, lower than 0.2 mass%, was used for the study. Copper and phosphorus contents were varied in a wide range: 0.005–0.99 and 0.002–0.039 mass%, respectively. Recovery efficiency has been estimated by the value of residual embrittlement after annealing, measured in terms of a shift in transition temperature (ΔTKres). A comparison of the results obtained on model alloys with data for VVER-440 RPV materials has also been carried out. Comparative analysis has confirmed the conclusion that ΔTKres is independent of phosphorus content while the effect of copper on ΔTKres is not significant for typical VVER-440 RPV materials with a typical range of Cu contents between 0.10 and 0.24 mass%. However, for model alloys with a wider range of copper content, copper mainly controls the value of ΔTKres."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::d5fd949409bb3843b43c4b4ef2b9777e","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/0ed9fc8b-a19d-488f-bbe9-b72d46e93cf5","https://doi.org/10.1016/j.ijpvp.2007.05.002"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720188893,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-02-05T01:22:31Z","harvestDate":"2020-10-14T13:04:23.268Z","identifier":"oai:cris.vtt.fi:publications/0ed9fc8b-a19d-488f-bbe9-b72d46e93cf5","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/0ed9fc8b-a19d-488f-bbe9-b72d46e93cf5"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.ijpvp.2007.05.002"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Valo , M , Debarberis , L , Kryukov , A & Chernobaeva , A 2008 , ' Copper and phosphorus effect on residual embrittlement of irradiated model alloys and RPV steels after annealing ' , International Journal of Pressure Vessels and Piping , vol. 85 , no. 8 , pp. 575-579 . https://doi.org/10.1016/j.ijpvp.2007.05.002"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Annealing"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Irradiation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Recovery"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Transition temperature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Copper"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Phosphorus"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Copper and phosphorus effect on residual embrittlement of irradiated model alloys and RPV steels after annealing"}]} +{"author":[{"fullname":"Viitaniemi, Pertti","name":"Pertti","pid":[],"rank":1,"surname":"Viitaniemi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"dateofcollection":"2020-10-14T13:09:13.348Z","dateoftransformation":"2020-10-14T16:58:47.202Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f1fa9e5d1ba36533cb0afb54538a6b09","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1988-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719769155,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-08-22T10:46:50Z","harvestDate":"2020-10-14T13:09:13.348Z","identifier":"oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d6ea13ad-3916-4541-80b6-0dbc01138a19"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Viitaniemi , P 1988 , ' Puun kierteisyys ja sen vaikutus sahatavaran laatuun ' , Sahamies , no. 9 , pp. 260-264 ."}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/d1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"D1 Professional magazine article"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Puun kierteisyys ja sen vaikutus sahatavaran laatuun"}]} +{"author":[{"fullname":"Varjonen, Suvi","name":"Suvi","pid":[],"rank":1,"surname":"Varjonen"},{"fullname":"Laaksonen, Päivi","name":"Päivi","pid":[],"rank":2,"surname":"Laaksonen"},{"fullname":"Paananen, Arja","name":"Arja","pid":[],"rank":3,"surname":"Paananen"},{"fullname":"Valo, Hanna","name":"Hanna","pid":[],"rank":4,"surname":"Valo"},{"fullname":"Hähl, Hendrik","name":"Hendrik","pid":[],"rank":5,"surname":"Hähl"},{"fullname":"Laaksonen, Timo","name":"Timo","pid":[],"rank":6,"surname":"Laaksonen"},{"fullname":"Linder, Markus","name":"Markus","pid":[],"rank":7,"surname":"Linder"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"dateofcollection":"2020-10-14T13:00:59.594Z","dateoftransformation":"2020-10-14T17:14:32.702Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"One central problem for the function and manufacture of materials where performance relies on nanoscale structure is to control the compatibility and interactions of the building blocks. In natural materials, such as nacre, there are examples of multifunctional macromolecules that have combined binding affinities for different materials within the same molecule, thereby bridging these materials and acting as a molecular glue. Here, we describe the use of a designed multifunctional protein that is used for self-assembly of nanofibrillar cellulose. Recent advances in the production of cellulose nanofibrils have given inspiration for new uses of cellulosic materials. Cellulose nanofibrils have mechanical and structural features that open new possibilities for performance in composites and other nanoscale materials. Functionalisation was realised through a bi-functional fusion protein having both an ability to bind to cellulose and a second functionality of surface activity. The cellulose-binding function was obtained using cellulose-binding domains from cellulolytic enzymes and the surface activity through the use of a surface active protein called hydrophobin. Using the bi-functional protein, cellulose nanofibrils could be assembled into tightly packed thin films at the air/water interface and at the oil/water interface. It was shown that the combination of protein and cellulose nanofibrils resulted in a synergistic improvement in the formation and stability of oil-in-water emulsions resulting in emulsions that were stable for several months. The bi-functionality of the protein also allowed the binding of hydrophobic solid drug nanoparticles to cellulose nanofibrils and thereby improving their long-term stability under physiological conditions."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/5521b424-20a0-4f8c-8c70-505af50c5fef","https://doi.org/10.1039/C0SM01114B"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603719787721,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-04-24T01:09:04Z","harvestDate":"2020-10-14T13:00:59.594Z","identifier":"oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/5521b424-20a0-4f8c-8c70-505af50c5fef"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1039/C0SM01114B"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Varjonen , S , Laaksonen , P , Paananen , A , Valo , H , Hähl , H , Laaksonen , T & Linder , M 2011 , ' Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins ' , Soft Matter , vol. 7 , no. 6 , pp. 2402-2411 . https://doi.org/10.1039/C0SM01114B"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/publicationtypes/a1"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"A1 Refereed journal article"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"/fi/minedu/virta/openaccess/0"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"0 Not Open Access"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Self-assembly of cellulose nanofibrils by genetically engineered fusion proteins"}]} +{"author":[{"fullname":"Macharia, Bodia","name":"Bodia","pid":[],"rank":1,"surname":"Macharia"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Crosta, Suzanne"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"French"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"dateofcollection":"2020-09-30T05:06:26.491Z","dateoftransformation":"2020-10-09T05:07:41.329Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

Ce qui nous frappe en premier lieu dans le roman de Melchior Mbonimpa, Le Totem des Baranda, c'est l'efficacité de sa fonction didactique. Le livre est porteur d'un message, d'une vision sur l'histoire de l'Afrique des Grands Lacs qu'on rencontre rarement dans l'actualité ou l'histoire immédiate que nous livrent les médias.

Cette thèse se penchera sur un aspect de ce roman qui a particulièrement attiré notre attention: la représentation de la résistance à travers l'affirmation identitaire des personnages féminins. Dans notre analyse de ce thème, nous accordons une grande importance au contexte socioculturel et à l'imaginaire collectif qui nourrissent l'auteur et dans lesquels s'inscrivent ses personnages. L'étude soulignera la fonction sociale de l'écrivain et relèvera la contribution de la fiction à la culture africaine. Nous mettrons en évidence les positions idéologiques de l'auteur et le message de sensibilisation qu'il adresse à diverses catégories de lecteurs: ceux qui se trouvent en Afrique, les Africains de la diaspora, et les lecteurs non-Africains qui s'intéressent à son œuvre. Cette orientation idéologique affecte évidemment la manière dont s'exprime la résistance des pnncIpaux personnages féminins du roman. Elle détermine la place que cette fiction accorde à la femme, et qui correspond sans doute à la place que, selon les souhaits de l'auteur, la femme devrait occuper dans le contexte culturel africain. Notre étude insiste aussi sur le fait que dans ce roman, la modernité se nourrit de la culture traditionnelle et ne renie pas les valeurs du passé.

Ainsi, le parcours narratif montre que dans leur résistance, les héroïnes de la généalogie vivent à leur époque, mais chacune porte plus loin une mission transmise par les précédentes.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::4ebf4c756904fa8a37615e44b1200332","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-06-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/10605"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720429600,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:27Z","harvestDate":"2020-09-30T05:06:26.491Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/10605","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/10605"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"French and Francophone Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Pour une renaissance généalogique: résistance féminine chez Melchior Mbonimpa"}]} +{"author":[{"fullname":"Krause, Walter Thomas","name":"Walter Thomas","pid":[],"rank":1,"surname":"Krause"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Datars, W.R."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Physics"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"dateofcollection":"2020-09-30T05:06:17.843Z","dateoftransformation":"2020-10-09T05:07:43.404Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

The superconducting transition in the magnetic resistance of members of the family of bismuth based high temperature superconductors was investigated. Measurements were performed in magnetic fields up to 1.7 T. Small current densities ranging from 0.03 A/cm² to 3.0 A/cm² were applied. The resistivity of Bi₂Sr₂CaCu₂Ox single crystals was analyzed in terms of the thermally activated flux flow expression, ρ =ρ₀U/T exp(-U/T) where T is the temperature. It was found that the activation energy was given by Uα(Hsinθ)^(⁻α) where α≈1/3 and that the prefactor had the form, ρ₀Hsinθ, where H was the applied field and θ the angle of the field with respect to the CuO₂ planes. Results demonstrated that dissipation could be accounted for by the motion of two-dimensional vortices whose density is given by the field, Hsinθ, projected on the CuO₂ planes. Measurements of the resistivity and current dependent resistivity were performed with two Sn-doped and two Sb-doped polycrystalline Bi(1.7)Pb(0.3)Sr₂Ca₂Cu₃O(y) samples. Features in the temperature derivative of the resistivity curves were associated with the presence of a superconducting transition between superconducting grains, coupled by weak links with a distribution of critical currents and critical temperatures, and the superconducting transition within grains. The transition between grains was more strongly suppressed in temperature with the application of a magnetic field in samples with weaker coupling between grains. The presence of a transition in a magnetic field due to weak links between grains was verified at 77 K by the observation of a current dependent resistivity in a magnetic field. Measurements of a Bi₂Sr₂CaCu₂Ox diffusion grown thick film ring were done. The transverse voltage, the voltage at the centre of a 120 μm thick branch with respect to the centre of a 76 μm thick branch, was measured. A higher critical temperature from the presence of more texturing in the 76 μm branch as determined by separate resistivity, x-ray and scanning electron microscopy measurements was consistent with the measurement of a crossover from a negative to positive transverse voltage as the temperature of the sample went through its superconducting transition.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Doctor of Philosophy (PhD)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::57e9c0e7f2803e74fef30e18bab5e450","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1992-07-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/8621"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720431464,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:31:35Z","harvestDate":"2020-09-30T05:06:17.843Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/8621","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/8621"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Physics"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Magnetic field resistivity of superconducting bismuth oxides"}]} +{"author":[{"fullname":"Sharp, Jeremy","name":"Jeremy","pid":[],"rank":1,"surname":"Sharp"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Savage, A."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"English"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"dateofcollection":"2020-09-30T05:06:27.768Z","dateoftransformation":"2020-10-09T05:07:56.297Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

This study examines Leonard Cohen's novel Beautiful Losers through the lenses of allegorical and authorial theories to appreciate how the novel uses allegorical techniques to code into symbolic terms an exploration of the polysemous nature of the word ''translation.'' The first chapter studies the stylistic and conceptual dimensions of allegory as a literary genre - as critics like Northrop Frye, Angus Fletchet, and Maureen Quilligan help to define it - while arguing that Cohen's novel is consciously allegorical, challenging readers to interpret what it \"means,\" or may mean. The second chapter performs an intensive re-reading of Beautiful Losers, examining how the novel uses complex systems of verbal play (particularly puns) to coordinate a reunification of various dichotomies historical \"reality\"/imaginative myth, secularity/spirituality, enslavement/sanctification, among others - employed throughout the text. The thesis concludes that the novel is perpetually playing with various types of translation (spiritual, linguistic, physical, and so forth), affirming the need for emotionally-charged, devotional forms of expression (like song and prayer) over more clinical attempts to reorder or recreate the world and its inhabitants. Ultimately, this discussion argues that an understanding of the allegorical dimensions of Beautiful Losers may illuminate how Cohen's other works (particularly his songs) may be studied as attempts to associate word with voice, to emphasize the process of expression (translation) rather than just the finished product.

"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Master of Arts (MA)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|73490d0e0f82::8ab8cb6d096b31eb67b4aaf43ca2d75f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1997-08-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::149c6590f8a06b46314eed77bfca693f","value":"Canada Research"},"instancetype":{"classid":"0044","classname":"Thesis","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://canadaresearch.mcmaster.ca/handle/11375/11059"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720442126,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcanadaresearch.mcmaster.ca%2Foai%2Frequest","datestamp":"2020-09-23T17:34:51Z","harvestDate":"2020-09-30T05:06:27.768Z","identifier":"oai:canadaresearch.mcmaster.ca:11375/11059","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:canadaresearch.mcmaster.ca:11375/11059"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"English Language and Literature"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"''What'd I Say?\": Beautiful Losers' Allegory of Translation"}]} +{"author":[{"fullname":"Almeida, Henrique","name":"Henrique","pid":[],"rank":1,"surname":"Almeida"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::f54ac940e822726208c7b2dd526e4849","value":" Revistas Científicas da Universidade Católica Portuguesa"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2004-01-01"},"dateofcollection":"2020-02-25T13:35:11.629Z","dateoftransformation":"2020-09-03T20:15:27.783Z","description":[],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|a587306331dd::0f52c20c167a5494879e0614164585f0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::f54ac940e822726208c7b2dd526e4849","value":" Revistas Científicas da Universidade Católica Portuguesa"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2004-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::f54ac940e822726208c7b2dd526e4849","value":" Revistas Científicas da Universidade Católica Portuguesa"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by/4.0"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://revistas.ucp.pt/index.php/mathesis/article/view/3918"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"0872-0215","issnPrinted":"0872-0215","name":"Máthesis","sp":"","vol":""},"language":{"classid":"por","classname":"Portuguese","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720006001,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frevistas.ucp.pt%2Findex.php%2Findex%2Foai","datestamp":"2020-02-20T15:56:11Z","harvestDate":"2020-02-25T13:35:11.629Z","identifier":"oai:ojs.revistas.ucp.pt:article/3918","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:ojs.revistas.ucp.pt:article/3918"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.34632/mathesis.2004.3918"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Universidade Católica Portuguesa"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Máthesis; n. 13 (2004); 147-157"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0872-0215"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"10.34632/mathesis.2004.n13"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Aquilino Ribeiro e a crítica: reconstituição de um episódio polémico"}]} +{"author":[{"fullname":"Sintonen, Matti","name":"Matti","pid":[],"rank":1,"surname":"Sintonen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-03-22"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:36.322Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Edellisissä muistikuvissa käsittelin akateemikkoja Eino Kaila, G. H. von Wright ja Oiva Ketonen. Nyt on vuorossa keväällä 2017 nimitetty Ilkka Niiniluoto."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::3afa40ff6212f33711b2b125a05da061","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-03-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/69941"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"36"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720418888,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2018-03-22T06:05:40Z","harvestDate":"2020-08-27T12:58:53.52Z","identifier":"oai:journal.fi:article/69941","metadataNamespace":""}},"originalId":["oai:journal.fi:article/69941"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Muistikuvia"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Täyskäsi – Ilkka Niiniluoto"}]} +{"author":[{"fullname":"Hetemäki, Ilari","name":"Ilari","pid":[],"rank":1,"surname":"Hetemäki"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:44.425Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-05-07"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/1850"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"3","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"27"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720425037,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-08-27T12:42:47.579Z","identifier":"oai:journal.fi:article/1850","metadataNamespace":""}},"originalId":["oai:journal.fi:article/1850"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Lyhyesti"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Lyhyesti"}]} +{"author":[{"fullname":"Kulonen[-Korhonen], Ulla[-Maija]","pid":[],"rank":1}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:51.844Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvio Abondolo, Daniel Mario: Hungarian inflectional morphology Kielenainekset etuprosodinen (kieli: suomi, sivulla: 254) juuri (kieli: suomi, sivulla: 254) koodi (kieli: suomi, sivulla: 254) subjektikonjugaatio (kieli: suomi, sivulla: 255) takaprosodinen (kieli: suomi, sivulla: 254)"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::4c60c95783c4b240747e52990e709573","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1990-01-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0015","classname":"Review","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/virittaja/article/view/38371"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"253","iss":"2","issnLinking":"","issnOnline":"2242-8828","issnPrinted":"0042-6806","name":"Virittäjä","sp":"253","vol":"94"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720430784,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:05Z","harvestDate":"2020-08-27T14:00:01.261Z","identifier":"oai:journal.fi:article/38371","metadataNamespace":""}},"originalId":["oai:journal.fi:article/38371"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kotikielen Seura"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Virittäjä"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuutta"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tuore näkemys unkarin taivutusmorfologiasta"}]} +{"author":[{"fullname":"Kerppola-Pesu, Jenni","name":"Jenni","pid":[],"rank":1,"surname":"Kerppola-Pesu"},{"fullname":"Halme, Nina","name":"Nina","pid":[],"rank":2,"surname":"Halme"},{"fullname":"Pietilä, Anna-Maija","name":"Anna-Maija","pid":[],"rank":3,"surname":"Pietilä"},{"fullname":"Perälä, Marja-Leena","name":"Marja-Leena","pid":[],"rank":4,"surname":"Perälä"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"dateofcollection":"","dateoftransformation":"2020-08-27T16:59:55.86Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vanhempien osallisuuden vahvistaminen sekä oikeus tulla kuulluksi ovat keskeisiä lasten palveluja ohjaavia periaatteita. Osallisuuden toteutumisessa on kuitenkin edelleen puutteita. Tämän tutkimuksen tarkoituksena oli selvittää päihdepalvelujen esimiesten käsityksiä siitä, miten päihdepalvelujen piirissä olevien vanhempien osallisuutta tuetaan. Osallisuuden tukemista arvioitiin työntekijöille modifiodulla Family Empowerment Scale -mittarilla (FES). Aineisto kerättiin päihdepalveluissa toimivilta esimiehiltä (n=372). Vastausprosentti oli 36. Taustamuuttujien perusteella määräytyvien vastaajaryhmien väliset erot analysoitiin riippumattomien otosten t-testillä sekä yksisuuntaisella varianssianalyysillä. Vanhempien osallisuuden tukeminen toteutui kohtuullisesti kaikissa toimipisteissä. Merkittävimmät kehittämiskohteet liittyivät perheiden riittämättömään tiedonsaantiin, heikkoihin palautteen antomahdollisuuksin, perheen ja henkilöstön välisen yhteistyön sekä vanhempien yhteiskunnallisten vaikutusmahdollisuuksien lisäämiseen. Vastaajien mukaan toimipisteen luonne oli yhteydessä osallisuuden tukemiseen päihdepalveluissa."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::5115f8bae044b12a72b0741673c66fcb","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-09-23"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/sla/article/view/47238"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0355-5097","name":"Sosiaalilääketieteellinen Aikakauslehti","sp":"","vol":"51"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720434259,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2015-07-02T10:20:48Z","harvestDate":"2020-08-27T13:08:26.705Z","identifier":"oai:journal.fi:article/47238","metadataNamespace":""}},"originalId":["oai:journal.fi:article/47238"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteen yhdistys ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Sosiaalilääketieteellinen Aikakauslehti"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Päihdepalvelujen piirissä olevien vanhempien osallisuuden tukeminen"}]} +{"author":[{"fullname":"Ritari, Katja","name":"Katja","pid":[],"rank":1,"surname":"Ritari"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:21.371Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::72070913a49aa49d3b5abc600f940893","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-12-05"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/scf/article/view/77169"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"2242-4261","issnPrinted":"1795-097X","name":"Studia Celtica Fennica","sp":"","vol":"14"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720459568,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2018-12-05T13:07:12Z","harvestDate":"2020-08-27T14:57:55.374Z","identifier":"oai:journal.fi:article/77169","metadataNamespace":""}},"originalId":["oai:journal.fi:article/77169"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Finnish Society for Celtic Studies SFKS ry."},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Studia Celtica Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Editorial"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial"}]} +{"author":[{"fullname":"Hoffrén, Jukka","name":"Jukka","pid":[],"rank":1,"surname":"Hoffrén"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:16.036Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Suomalaista hyvinvointiyhteiskuntaa pidettiin pitkään koko kansakuntaa yhdistävänä menestystarinana. Hyvinvoinnin huippukohta saavutettiin 1990-luvun alussa, ja sen jälkeen tarina on saanut entistä enemmän säröjä. Uusien mittareiden mukaan suomalaisten hyvinvointi on polkenut paikallaan tai jopa alentunut, vaikka ruttokansantuotteella (BKT) mitattu talouskasvu onkin saatu jatkumaan voimakkaana. Suurimpia syitä hyvinvoinnin laskuun ovat tuloerojen kasvaminen, talouden ympäristöön kasautuvan kuormituksen kasvu sekä luonnonvarojen kiihtyvä kulutus. Jälkiteolliseen yhteiskuntaan siirtyminen muuttaa tuotanto- ja elämäntapoja sekä rikkoo aiempia uskomuksia perinteisen talouskasvun siunauksellisuudesta yhteiskunnalliselle kehitykselle."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::7709c0dd641ca56ada58c9378e156648","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-03-22"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/5022"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"2","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":"30"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720758508,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:32:18.573Z","identifier":"oai:journal.fi:article/5022","metadataNamespace":""}},"originalId":["oai:journal.fi:article/5022"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Suomalaisen hyvinvoinnin tarina"}]} +{"author":[{"fullname":"Tuominen, Kimmo","name":"Kimmo","pid":[],"rank":1,"surname":"Tuominen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1994-01-03"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:26.966Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::79ae333a14362379d40ce2444a68b51f","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1994-01-03"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/inf/article/view/1478"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"75","iss":"","issnLinking":"","issnOnline":"1797-9129","issnPrinted":"1797-9137","name":"Informaatiotutkimus","sp":"64","vol":""},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720465388,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-08-27T13:11:48.927Z","identifier":"oai:journal.fi:article/1478","metadataNamespace":""}},"originalId":["oai:journal.fi:article/1478"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Informaatiotutkimuksen yhdistys ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Informaatiotutkimus"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Artikkelit"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Yksilö tiedonhankkijana : Sense-making -teorian näkökulma tiedonhankintaprosessiin"}]} +{"author":[{"fullname":"Coleman, David","name":"David","pid":[],"rank":1,"surname":"Coleman"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:51.888Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Radical changes in living arrangements, in sexual habits and in the position of marriage in Europe have arisen, very unevenly, since the 1960s and in some regions in scarcely more than a decade. Cohabitation before marriage is normal – even universal – in many countries, with the popularity of marriage falling as its mean age rises to beyond the highest levels hitherto recorded. Divorce has been legalised and in most cases made readily accessible. However, although not as firm a demarcation as once believed, Hajnal’s line separating East and West has not yet been erased from the map of contemporary Europe. The article describes patterns and trends in partnership in Europe, including trends in marriage rates, divorce rates, the spread of cohabitation, LAT-relations and of births outside marriage, and tries to account for them."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::9fd9c0e6da2caa6ac1d655079482b428","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2013-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/fypr/article/view/40927"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"49","iss":"","issnLinking":"","issnOnline":"1796-6191","issnPrinted":"1796-6183","name":"Finnish Yearbook of Population Research","sp":"5","vol":"48"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720495570,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2020-06-24T08:28:25Z","harvestDate":"2020-08-27T15:11:11.742Z","identifier":"oai:journal.fi:article/40927","metadataNamespace":""}},"originalId":["oai:journal.fi:article/40927"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.23979/fypr.40927"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"the Family Federation of Finland"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Finnish Yearbook of Population Research"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Articles"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Partnership in Europe; its Variety, Trends and Dissolution"}]} +{"author":[{"fullname":"Siivonen, Katriina","name":"Katriina","pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"0000-0002-6302-1146"}],"rank":1,"surname":"Siivonen"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"dateofcollection":"","dateoftransformation":"2020-08-27T17:00:57.958Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Yrsa Lindqvist (ed.) 2008. Tradition och turism på Åland. Att använda kulturarven. (Tradition and Tourism in Åland.) Meddelanden från Folkkultursarkivet 21. Skrifter utgivna av Svenska litteratursällskapet i Finland 711. Helsingfors: Svenska litteratursällskapet i Finland. 240 pp. III. ISBN 978-951-583-167-5."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::a974b7e5144d11e293162c96ff33a4f0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-12-31"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/ethnolfenn/article/view/65995"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"101","iss":"","issnLinking":"","issnOnline":"2489-4982","issnPrinted":"0355-1776","name":"Ethnologia Fennica","sp":"100","vol":"36"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720502596,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2017-09-29T12:07:21Z","harvestDate":"2020-08-27T15:37:26.591Z","identifier":"oai:journal.fi:article/65995","metadataNamespace":""}},"originalId":["oai:journal.fi:article/65995"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnos ry"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ethnologia Fennica"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Book Reviews"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cultural Heritage in Use in Åland"}]} +{"author":[{"fullname":"Portin, Petter","name":"Petter","pid":[],"rank":1,"surname":"Portin"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:08:50.546Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kirja-arvostelu: Worldwatch-instituutti: Maailman tila 2008. Kestävä talous. Raportti kehityksestä kohti kestävää yhteiskuntaa. Suomentanut Jani Kaaro. Gaudeamus 2008."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::ad462fe2a7230b480118e7d8d37476d5","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2008-05-02"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/tt/article/view/490"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"1239-6540","issnPrinted":"0781-7916","name":"Tieteessä tapahtuu","sp":"","vol":""},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720348067,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-10-09T11:24:04Z","harvestDate":"2020-10-17T21:28:00.546Z","identifier":"oai:journal.fi:article/490","metadataNamespace":""}},"originalId":["oai:journal.fi:article/490"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteellisten seurain valtuuskunta"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tieteessä tapahtuu"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuus"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ilmastonmuutos – vakava markkinahäiriö"}]} +{"author":[{"fullname":"Kunnas, Niina","name":"Niina","pid":[],"rank":1,"surname":"Kunnas"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-10"},"dateofcollection":"","dateoftransformation":"2020-10-18T02:09:19.343Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Arvioitu teos Anneli Sarhimaa: Vaietut ja vaiennetut. Karjalankieliset karjalaiset Suomessa. Tietolipas 256. Helsinki: Suomalaisen Kirjallisuuden Seura 2017. 297 s. isbn 978-952-222-890-1."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|a89337edbe55::db6812b4988f5dcf9b65c31e4ece2d53","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-10-10"},"distributionlocation":"","hostedby":{"key":"10|openaire____::60c9c3845e65705771ad92eb451cfb2f","value":"Journal.fi"},"instancetype":{"classid":"0015","classname":"Review","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://journal.fi/virittaja/article/view/69682"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"3","issnLinking":"","issnOnline":"2242-8828","issnPrinted":"0042-6806","name":"Virittäjä","sp":"","vol":"122"},"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720388397,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fjournal.fi%2Findex%2Foai","datestamp":"2019-07-22T21:52:02Z","harvestDate":"2020-10-17T22:57:54.316Z","identifier":"oai:journal.fi:article/69682","metadataNamespace":""}},"originalId":["oai:journal.fi:article/69682"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.23982/vir.69682"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Kotikielen Seura"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Virittäjä"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Kirjallisuutta"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Tuoretta tietoa Suomessa puhuttavasta karjalan kielestä"}]} +{"author":[{"fullname":"Ben Romdhanne Bilel","pid":[],"rank":1},{"fullname":"Nikaein Navid","pid":[],"rank":2},{"fullname":"Knopp Raymond","pid":[],"rank":3},{"fullname":"Bonnet Christian","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-10-31"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.131Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::72f7ad968fa42cfbf0d3d7b245e43477","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-10-31"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=2069102"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720262482,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1145/2069087.2069102"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"OpenAirInterface large-scale wireless emulation platform and methodology"}]} +{"author":[{"fullname":"Gilles Barthe","pid":[],"rank":1},{"fullname":"Tamara Rezk","pid":[],"rank":2},{"fullname":"Alejandro Russo","pid":[],"rank":3},{"fullname":"Andrei Sabelfeld","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-07-30"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.575Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::cbd6814ee33b6357c7cea7c008a72b80","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2010-07-30"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=1805977"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720264785,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1145/1805974.1805977"}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Security of multithreaded programs by compilation"}]} +{"author":[{"fullname":"Klaus Ostermann","pid":[],"rank":1},{"fullname":"Paolo Giarrusso","pid":[],"rank":2},{"fullname":"Christian Kästner","name":"Christian K.","pid":[],"rank":3,"surname":"Stner"},{"fullname":"Tillmann Rendel","pid":[],"rank":4}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"dateofcollection":"2015-01-20T00:00:00Z","dateoftransformation":"2016-03-12T12:49:39.741Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2011-07-25"},"distributionlocation":"","hostedby":{"key":"10|openaire____::02b55e4f52388520bfe11f959f836e68","value":"ACM Digital Library"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dl.acm.org/citation.cfm?id=2032509"]}],"language":{"classid":"und","classname":"Undetermined","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720265881,"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":""}],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:mining:repository","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Revisiting information hiding"}]} +{"author":[{"fullname":"Hernandez Lopezomoza, Mario Andres","name":"Mario Andres","pid":[],"rank":1,"surname":"Hernandez Lopezomoza"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Institut Supérieur de l'Aéronautique et de l'Espace"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Biannic, Jean-Marc"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Jouhaud, Frank"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"dateofcollection":"2016-02-26T12:03:21.28Z","dateoftransformation":"2020-08-15T08:01:27.526Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Ce travail de thèse est consacré à l'extension de l'Inversion Dynamique non-linéaire (NDI-Nonlinear Dynamic Inversion) pour un ensemble plus grand de systèmes non-linéaires, tout en garantissant des conditions de stabilité suffisantes. La NDI a été étudiée dans le cas de diverses applications, y compris en aéronautique et en aérospatiale. Elle permet de calculer des lois de contrôle capables de linéariser et de découpler un modèle non-linéaire à tout point de fonctionnement de son enveloppe d'état. Cependant cette méthode est intrinsèquement non-robuste aux erreurs de modélisation et aux saturations en entrée. En outre, dans un contexte non-linéaire, l'obtention d'une garantie quantifiable du domaine de stabilité atteint reste à l'heure actuelle complexe. Contrairement aux approches classiques de la NDI, notre méthodologie peut être considérée comme un cadre de compensation non-linéaire généralisé qui permet d'intégrer les incertitudes et les saturations en entrée dans le processus de conception. En utilisant des stratégies de contrôle antiwindup, la loi de pilotage peut être calculée grâce à un simple processus en deux phases. Dans ce cadre de travail généralisé des transformations linéaires fractionnaires (LFT - Linear Fractional Transformations) de la boucle fermée non-linéaire peuvent être facilement déduites pour l'analyse de la stabilité robuste en utilisant des outils standards pour de systèmes linéaires. La méthode proposée est testée pour le pilotage d'un véhicule de rentrée atmosphérique de type aile delta lors de ses phases hypersonique, transsonique et subsonique. Pour cette thèse, un simulateur du vol incluant divers facteurs externes ainsi que des erreurs de modélisation a été développé dans Simulink.\n\nThis thesis work is devoted to extending Nonlinear Dynamic Inversion (NDI) for a large scale of\nnonlinear systems while guaranteeing sufficient stability conditions. NDI has been studied in a wide range of applications, including aeronautics and aerospace. It allows to compute nonlinear control laws able to decouple and linearize a model at any operating point of its state envelope. However, this method is inherently non-robust to modelling errors and input saturations. Moreover, obtaining a quantifiable guarantee of the attained stability domain in a nonlinear control context is not a very straightforward task. Unlike standard NDI approaches, our methodology can be viewed as a generalized nonlinear compensation framework which allows to incorporate uncertainties and input saturations in the design process. Paralleling anti-windup strategies, the controller can be computed through a single multichannel optimization problem or through a simple two-step process. Within this framework, linear fractional transformations of the nonlinear closed-loop can be easily derived for robust stability analysis using standard tools for linear systems. The proposed method is tested for the flight control of a delta wing type reentry vehicle at hypersonic, transonic and subsonic phases of the atmospheric reentry. For this thesis work, a Flight Mechanics simulator including diverse external factors and modelling errors was developed in Simulink."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|artemis___fr::630e47d8b572e3df0e91327d6d8f036d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-09-21"},"distributionlocation":"","hostedby":{"key":"10|driver______::66c20c26ac26136628f5207819ae1abc","value":"Archives des thèses et mémoires de l’ISAE (ArTeMIS)"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://depozit.isae.fr/theses/2012/2012_Hernandez_Lopezomoza_Mario_Andres.pdf"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720142745,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"http://ori-oai.isae.fr/ori-oai-repository/OAIHandler","datestamp":"2013-06-13","harvestDate":"2016-02-26T12:03:21.28Z","identifier":"oai:isae-repo.fr:isae-371","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:isae-repo.fr:isae-371"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Compensation non-linéaire généralisée"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Inversion dynamic non-linéaire"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande anti-windup"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande robuste"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Transformations linéaires fractionnaires"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Commande H-infinie non-lisse"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Generalized nonlinear compensation"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Nonlinear dynamic inversion"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Anti-windup control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Robust control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Linear fractional transformation, Nonsmooth H-infinity control"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"Atmospheric reentry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:result_subject","schemename":"dnet:result_subject"},"value":"629.8"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Cadre de travail généralisé de compensation non-linéaire robuste : application à la rentrée atmosphérique"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A generalized framework for robust nonlinear compensation : application to an atmospheric reentry control problem"}]} +{"author":[{"fullname":"Manea, Florin","name":"Florin","pid":[],"rank":1,"surname":"Manea"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-10-01"},"dateofcollection":"2017-09-25T17:25:29.95Z","dateoftransformation":"2019-01-23T10:12:38.35Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|base_oa_____::020ca6ee0ae16e1e9c1405207087a671","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2012-10-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.elsevier.com/open-access/userlicense/1.0/"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.1016/j.tcs.2012.06.029"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"79","iss":"","issnLinking":"","issnOnline":"","issnPrinted":"0304-3975","name":"Theoretical Computer Science","sp":"65","vol":"456"},"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720301847,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"file:///mnt/downloaded_dumps/mdstore/base/base_winnower_elsevier.xml.gz","datestamp":"","harvestDate":"2017-09-25T17:25:29.95Z","identifier":"","metadataNamespace":""}},"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.tcs.2012.06.029"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Elsevier BV"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Theoretical Computer Science"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Computer Science(all)"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Complexity results for deciding Networks of Evolutionary Processors"}]} +{"author":[{"fullname":"Yanlong, Yin","name":"Yin","pid":[],"rank":1,"surname":"Yanlong"},{"fullname":"Mingkai, Yao","name":"Yao","pid":[],"rank":2,"surname":"Mingkai"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-10-01"},"dateofcollection":"2017-09-25T17:16:18.455Z","dateoftransformation":"2019-01-23T10:13:00.54Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|base_oa_____::02715dd9fc09b87966f3bad613aec7f9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-10-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.elsevier.com/open-access/userlicense/1.0/"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.1016/j.jacc.2014.06.769"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"16","issnLinking":"","issnOnline":"","issnPrinted":"0735-1097","name":"Journal of the American College of Cardiology","sp":"","vol":"64"},"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720310721,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"file:///mnt/downloaded_dumps/mdstore/base/base_winnower_elsevier.xml.gz","datestamp":"","harvestDate":"2017-09-25T17:16:18.455Z","identifier":"","metadataNamespace":""}},"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.jacc.2014.06.769"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Elsevier BV"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Cardiology and Cardiovascular Medicine"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"GW25-e1474 Radiofrequency catheter ablation via radial artery approach for left-sided atrioventricuar accessory pathways"}]} +{"author":[{"fullname":"Reed, Jonathan C","name":"Jonathan C.","pid":[],"rank":1,"surname":"Reed"},{"fullname":"Kasschau, Kristin D","name":"Kristin D.","pid":[],"rank":2,"surname":"Kasschau"},{"fullname":"Prokhnevsky, Alexey I","name":"Alexey I.","pid":[],"rank":3,"surname":"Prokhnevsky"},{"fullname":"Gopinath, Kodetham","name":"Kodetham","pid":[],"rank":4,"surname":"Gopinath"},{"fullname":"Pogue, Gregory P","name":"Gregory P.","pid":[],"rank":5,"surname":"Pogue"},{"fullname":"Carrington, James C","name":"James C.","pid":[],"rank":6,"surname":"Carrington"},{"fullname":"Dolja, Valerian V","name":"Valerian V.","pid":[],"rank":7,"surname":"Dolja"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-02-01"},"dateofcollection":"2017-09-25T17:22:28.969Z","dateoftransformation":"2019-01-23T10:13:50.503Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|base_oa_____::035f21f0844d85933ada45818ab13f5d","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2003-02-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.elsevier.com/open-access/userlicense/1.0/"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.1016/s0042-6822(02)00051-x"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"209","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0042-6822","name":"Virology","sp":"203","vol":"306"},"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720329442,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"file:///mnt/downloaded_dumps/mdstore/base/base_winnower_elsevier.xml.gz","datestamp":"","harvestDate":"2017-09-25T17:22:28.969Z","identifier":"","metadataNamespace":""}},"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/s0042-6822(02)00051-x"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Elsevier BV"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Virology"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Suppressor of RNA silencing encoded by Beet yellows virus"}]} +{"author":[{"fullname":"Miller, R.K","name":"R. K.","pid":[],"rank":1,"surname":"Miller"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1968-05-01"},"dateofcollection":"2017-09-25T17:23:20.638Z","dateoftransformation":"2019-01-23T10:16:13.468Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|base_oa_____::06505050e503a2e7f9167ba704fbeaac","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1968-05-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::df45502607927471ecf8a6ae83683ff5","value":"BASE (Open Access Aggregator)"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.elsevier.com/open-access/userlicense/1.0/"},"refereed":{"classid":"0000","classname":"UNKNOWN","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["http://dx.doi.org/10.1016/0022-247x(68)90176-5"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"340","iss":"2","issnLinking":"","issnOnline":"","issnPrinted":"0022-247X","name":"Journal of Mathematical Analysis and Applications","sp":"319","vol":"22"},"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1603720390392,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"file:///mnt/downloaded_dumps/mdstore/base/base_winnower_elsevier.xml.gz","datestamp":"","harvestDate":"2017-09-25T17:23:20.638Z","identifier":"","metadataNamespace":""}},"originalId":[""],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/0022-247x(68)90176-5"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Elsevier BV"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Applied Mathematics"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Analysis"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"On Volterra integral equations with nonnegative integrable resolvents"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/relations.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/relations.json new file mode 100644 index 000000000..dab359336 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/relations.json @@ -0,0 +1,30 @@ +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::5ac587eb28411c351c2e357eb097fd3d","subRelType":"provision","target":"20|doajarticles::b9e2aa37baa5c9b72d7dee964a442bda"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::690b3aaf177a4c70b81bacd8d023cbdc","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::7a71f278237d1ab35088efda03fa007a","subRelType":"provision","target":"20|doajarticles::03748bcb5d754c951efec9700e18a56d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::8b75543067b50076e70764917e188178","subRelType":"provision","target":"20|doajarticles::50cb15ff7a6a3f8531f063770179e346"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::9f3ff882f023209d9ffb4dc32b77d376","subRelType":"provision","target":"20|doajarticles::ffc1811633b3222e4764c7b0517f83e8"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::b566fa319c3923454e1e8eb886ab62d2","subRelType":"provision","target":"20|dedup_wf_001::4e6c928fef9851b37ec73f4f6daca35b"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","subRelType":"provision","target":"20|dedup_wf_001::846b777af165fef7c904a81712a83b66"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::fbf7592ddbf2ad3cc0ed70c0f2e1d67c","subRelType":"provision","target":"20|dedup_wf_001::1b965e2c0c53e5526d269d63bcfa0ae6"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::fd4c399077127f0ba09b5205e2b78406","subRelType":"provision","target":"20|doajarticles::1cae0b82b56ccd97c2db1f698def7074"} +{"collectedfrom":[{"key":"10|infrastruct_::f66f1bd369679b5b077dcdf006089556","value":"OpenAIRE"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|openaire____::8f991165fae922e29ad55d592f568464","subRelType":"provision","target":"20|openaire____::ec653e804967133b9436fdd30d3ff51d"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::16d11e9595188dbad0418a85f0351aba","subRelType":"provision","target":"20|opendoar____::041abd8c990fc531ab9bd2674a0e2725"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::46d3f6029f6170ebccb28945964d09bf","subRelType":"provision","target":"20|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::7501e5d4da87ac39d782741cd794002d","subRelType":"provision","target":"20|dedup_wf_001::04e2c34ef4daa411ff2497afc807b612"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::75b9b6dc7fe44437c6e0a69fd863dbab","subRelType":"provision","target":"20|dedup_wf_001::ad30fbc9b3b6f5370e59e58c456b7e19"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::d35b05a832e2bb91f110d54e34e2da79","subRelType":"provision","target":"20|opendoar____::589618708434cfc5b830601ac4b339ee"} +{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|opendoar____::fc2e6a440b94f64831840137698021e1","subRelType":"provision","target":"20|opendoar____::a82c7e358792e0018235b7f196fec4ed"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::128284e3cc130ee9c68c6955bf4d1385","subRelType":"provision","target":"20|dedup_wf_001::a9651cfb29790a31c580ccb5bffa9349"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::48b3cc4b8c5951621730829c60b1c205","subRelType":"provision","target":"20|dedup_wf_001::5bc6fca7649010470f1cc11f6675ffb3"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::8e7a079ee6d2d4933db9b898c789c2f8","subRelType":"provision","target":"20|dedup_wf_001::0b0ae5b38aedc082a8b089abdf3b752f"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::b26c86eba2b3ad8b242a93b581e6ec8e","subRelType":"provision","target":"20|re3data_____::e841a40265d8d0a6739ac71f56328da3"} +{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|re3data_____::cafe7980294aa5f935f433e7c8aab844","subRelType":"provision","target":"20|dedup_wf_001::2806db65ba8029ee196679cad067eff2"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::0cd5965141113df5739f1ac7ac7f6d37","subRelType":"participation","target":"40|aka_________::1bc716a1763110da3eb1af867de718a8"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::1e2df822bf0932ad0f77565789f22e17","subRelType":"participation","target":"40|aka_________::a6c805bcfd383bae043d8df38e79db78"} +{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8966"},"lastupdatetimestamp":1603564783812,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|aka_________::2c3aab6bce7516338b4dbfb4f6f86db7","subRelType":"affiliation","target":"50|dedup_wf_001::02859c30f6c8bfbdd8c427068a6ec684"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::8bcd11b6bffc76f4d4f88d7a6728d614","subRelType":"participation","target":"40|aka_________::645123c3fe7bab557c36f0f9bb02a4cd"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::8bcd11b6bffc76f4d4f88d7a6728d614","subRelType":"participation","target":"40|aka_________::fecf4f862a6b40dd2ccb1abc8fed5bc5"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::a30b9a45766293af38951d767e77a471","subRelType":"participation","target":"40|aka_________::68ce7288b1b036f73a1ff951c6524eba"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::a30b9a45766293af38951d767e77a471","subRelType":"participation","target":"40|aka_________::b733217d1cd609001dd3c75af419d872"} +{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::a30b9a45766293af38951d767e77a471","subRelType":"participation","target":"40|aka_________::c33dee8231ad0374caf93e52c5a473e5"} \ No newline at end of file From bde6d337dd9178c51a9fdd5028b3d42683bfef52 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 13:42:01 +0100 Subject: [PATCH 059/108] test classes for dump of results related to funders --- .../ResultLinkedToProjectTest.java | 129 ++++++++++++++++ .../dump/funderresult/SplitPerFunderTest.java | 140 +++++++++++++++++- 2 files changed, 268 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java index 3696db7f8..b7ee5b122 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java @@ -1,4 +1,133 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresult; +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; + public class ResultLinkedToProjectTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory + .getLogger(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class); + + private static HashMap map = new HashMap<>(); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void testNoMatch() throws Exception { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json") + .getPath(); + + final String relationPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/relations.json") + .getPath(); + + SparkResultLinkedToProject.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/preparedInfo", + "-sourcePath", sourcePath, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-relationPath", relationPath + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/preparedInfo") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Result.class)); + + Assertions.assertEquals(0, verificationDataset.count()); + + } + + @Test + public void testMatchOne() throws Exception { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json") + .getPath(); + + final String relationPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relations.json") + .getPath(); + + SparkResultLinkedToProject.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/preparedInfo", + "-sourcePath", sourcePath, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-relationPath", relationPath + + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/preparedInfo") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Publication.class)); + + Assertions.assertEquals(1, verificationDataset.count()); + + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java index 8e49b9fc6..3381dabe1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java @@ -1,4 +1,142 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresult; -public class DumpResultPerFunderTest { +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest; +import eu.dnetlib.dhp.oa.graph.dump.SplitForCommunityTest; +import eu.dnetlib.dhp.oa.graph.dump.community.CommunitySplit; +import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults; +import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +public class SplitPerFunderTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory.getLogger(SplitPerFunderTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(SplitPerFunderTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(SplitPerFunderTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(SplitPerFunderTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void test1() throws Exception { + + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump") + .getPath(); + + + SparkDumpFunderResults.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/split", + "-sourcePath", sourcePath, + "-relationPath", sourcePath + + }); + + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + // FP7 3 + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/split/EC_FP7") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + + Assertions.assertEquals(3, verificationDataset.count()); + + Assertions + .assertEquals( + 1, verificationDataset.filter("id = '50|dedup_wf_001::0d16b1714ab3077df73893a8ea57d776'").count()); + + + // CIHR 2 + tmp = sc + .textFile(workingDir.toString() + "/split/CIHR") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(2, tmp.count()); + + //NWO 1 + tmp = sc + .textFile(workingDir.toString() + "/split/NWO") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(1, tmp.count()); + + //NIH 3 + tmp = sc + .textFile(workingDir.toString() + "/split/NIH") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(2, tmp.count()); + + //NSF 1 + tmp = sc + .textFile(workingDir.toString() + "/split/NSF") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(1, tmp.count()); + + //SNSF 1 + tmp = sc + .textFile(workingDir.toString() + "/split/SNSF") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(1, tmp.count()); + + //NHMRC 1 + tmp = sc + .textFile(workingDir.toString() + "/split/NHMRC") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(1, tmp.count()); + + //H2020 3 + tmp = sc + .textFile(workingDir.toString() + "/split/EC_H2020") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(3, tmp.count()); + + } } From 21ce175d176f76283cf4f0754842bd967e8f5d68 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 13:42:31 +0100 Subject: [PATCH 060/108] added FilterFunction specification if filter operation --- .../eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index 6be1befce..55f075e95 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -9,6 +9,7 @@ import java.util.Set; import java.util.stream.Collectors; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; @@ -54,7 +55,7 @@ public class CommunitySplit implements Serializable { private static void printResult(String c, Dataset result, String outputPath) { Dataset community_products = result - .filter(r -> containsCommunity(r, c)); + .filter((FilterFunction) r -> containsCommunity(r, c)); try { community_products.first(); From 305e3d0c9c13b74d99f4dfcc547dae5df68af826 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 13:43:41 +0100 Subject: [PATCH 061/108] added resource file for relation with relClass = isProducedBy --- .../dhp/oa/graph/dump/funderresource/extendeddump/relation | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation index 3cb855c1a..3ed91f451 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation @@ -1,7 +1,7 @@ {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"40|cihr________::1e5e62235d094afd01cd56e65112fc63"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::5ac587eb28411c351c2e357eb097fd3d","subRelType":"provision","target":"40|nwo_________::dc69ada721bf21ed51055b6421850d73"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::690b3aaf177a4c70b81bacd8d023cbdc","subRelType":"provision","target":"40|nih_________::031bb5f2f70239b3210eda38b2079f67"} -{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::7a71f278237d1ab35088efda03fa007a","subRelType":"provision","target":"40|nsf_________::03748bcb5d754c951efec9700e18a56d"} +{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::7a71f278237d1ab35088efda03fa007a","subRelType":"provision","target":"40|nsf_________::03748bcb5d754c951efec9700e18a56d"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::8b75543067b50076e70764917e188178","subRelType":"provision","target":"40|snsf________::50cb15ff7a6a3f8531f063770179e346"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::9f3ff882f023209d9ffb4dc32b77d376","subRelType":"provision","target":"40|corda_______::ffc1811633b3222e4764c7b0517f83e8"} {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::b566fa319c3923454e1e8eb886ab62d2","subRelType":"provision","target":"40|nhmrc_______::4e6c928fef9851b37ec73f4f6daca35b"} From e7e418e444cab97084faa167c0fe3057b09a9de0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 13:44:10 +0100 Subject: [PATCH 062/108] added decision node to verify if to upload in Zenodo --- .../dump/funderresults/oozie_app/workflow.xml | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml index d258d4dd4..d310390b7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml @@ -1,6 +1,11 @@ + + upload + false + true to upload the dump for the funders in Zenodo + sourcePath the source path @@ -116,7 +121,7 @@ - +
@@ -515,12 +520,31 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/ext - --outputPath${outputPath} + --outputPath${workingDir}/resultperfunder --relationPath${sourcePath} - + + + + + eu.dnetlib.dhp.oa.graph.dump.MakeTar + --hdfsPath${outputPath} + --nameNode${nameNode} + + --sourcePath/user/miriam.baglioni/graph_dump_whole_production_funder_results + + + + + + + + ${wf:conf('upload') eq true} + + + From 87a9f616ae3d38062c136d1533c258dbbb68ac4f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 13:45:41 +0100 Subject: [PATCH 063/108] refactoring and addition of the funder nsp first part as nome for the dump insteasd of the whole nsp --- .../funderresults/SparkDumpFunderResults.java | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 232459c19..e2d8d7cb1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.*; +import eu.dnetlib.dhp.oa.graph.dump.Constants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -72,7 +73,7 @@ public class SparkDumpFunderResults implements Serializable { Dataset relation = Utils .readPath(spark, relationPath + "/relation", Relation.class) - .filter("dataInfo.deletedbyinference = false and relClass = 'isProducedBy'"); + .filter("dataInfo.deletedbyinference = false and lower(relClass) = '" + Constants.RESULT_PROJECT_IS_PRODUCED_BY.toLowerCase()+ "'"); Dataset result = Utils .readPath(spark, inputPath + "/publication", CommunityResult.class) @@ -86,13 +87,22 @@ public class SparkDumpFunderResults implements Serializable { .distinct() .collectAsList(); -// Dataset results = result -// .joinWith(relation, result.col("id").equalTo(relation.col("target")), "inner") -// .map((MapFunction, CommunityResult>) value -> { -// return value._1(); -// }, Encoders.bean(CommunityResult.class)); - funderList.forEach(funder -> writeFunderResult(funder, result, outputPath)); + funderList.forEach(funder -> { + String fundernsp = funder.substring(3); + String funderdump; + if (fundernsp.startsWith("corda")){ + funderdump = "EC_"; + if(fundernsp.endsWith("h2020")){ + funderdump += "H2020"; + }else{ + funderdump += "FP7"; + } + }else{ + funderdump = fundernsp.substring(0, fundernsp.indexOf("_")).toUpperCase(); + } + writeFunderResult(funder, result, outputPath + "/" + funderdump); + }); } @@ -113,7 +123,7 @@ public class SparkDumpFunderResults implements Serializable { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + "/" + funder); + .json(outputPath); } } From e758d5d9b4f6253a1b5deb65a35689705ffff2ce Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 13:46:39 +0100 Subject: [PATCH 064/108] refactoring --- .../dump/funderresults/SparkResultLinkedToProject.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index e5d7e2254..8c91fbf1f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -6,9 +6,9 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.Optional; +import eu.dnetlib.dhp.oa.graph.dump.Constants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; @@ -64,19 +64,19 @@ public class SparkResultLinkedToProject implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - writeResultsLikedToProjects(spark, inputClazz, inputPath, outputPath, relationPath); + writeResultsLinkedToProjects(spark, inputClazz, inputPath, outputPath, relationPath); }); } - private static void writeResultsLikedToProjects(SparkSession spark, Class inputClazz, - String inputPath, String outputPath, String relationPath) { + private static void writeResultsLinkedToProjects(SparkSession spark, Class inputClazz, + String inputPath, String outputPath, String relationPath) { Dataset results = Utils .readPath(spark, inputPath, inputClazz) .filter("dataInfo.deletedbyinference = false and datainfo.invisible = false"); Dataset relations = Utils .readPath(spark, relationPath, Relation.class) - .filter("dataInfo.deletedbyinference = false and lower(relClass) = 'isproducedby'"); + .filter("dataInfo.deletedbyinference = false and lower(relClass) = '" + Constants.RESULT_PROJECT_IS_PRODUCED_BY.toLowerCase() + "'"); relations .joinWith( From 1f130cdf92ea2335824d77d81888e00aedbdd24e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 14:04:26 +0100 Subject: [PATCH 065/108] changed the relation (produces -> isProducedBy) due to the change in the code --- .../graph/dump/resultProject/match/relation | 16 +-- .../dump/resultProject/match_one/relation | 16 +-- .../dump/resultProject/no_match/relation | 119 +++--------------- 3 files changed, 35 insertions(+), 116 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match/relation b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match/relation index e23eebcc6..b1c793e2a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match/relation +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match/relation @@ -1,8 +1,8 @@ -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","target":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::03376222b28a3aebf2730ac514818d04","subRelType":"outcome","target":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395839728,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","target":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395836359,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::7713935a3e4c65f30fcc6b064f212051","subRelType":"outcome","target":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","target":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","target":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","target":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","target":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf"} \ No newline at end of file +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","source":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::03376222b28a3aebf2730ac514818d04","subRelType":"outcome","source":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395839728,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","source":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395836359,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7713935a3e4c65f30fcc6b064f212051","subRelType":"outcome","source":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","source":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","source":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","source":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","source":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_one/relation b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_one/relation index 7d6e3af39..8a62409b7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_one/relation +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_one/relation @@ -1,8 +1,8 @@ -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","target":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","target":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395839728,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::7713935a3e4c65f30fcc6b064f212051","subRelType":"outcome","target":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395836359,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::7713935a3e4c65f30fcc6b064f212051","subRelType":"outcome","target":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","target":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","target":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","target":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","target":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf"} \ No newline at end of file +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","source":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","source":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395839728,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7713935a3e4c65f30fcc6b064f212051","subRelType":"outcome","source":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395836359,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7713935a3e4c65f30fcc6b064f212051","subRelType":"outcome","source":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","source":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","source":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","source":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","source":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/no_match/relation b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/no_match/relation index 2a14c05f4..b7956b405 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/no_match/relation +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultProject/no_match/relation @@ -1,100 +1,19 @@ -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","target":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","target":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395839728,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::7713935a3e4c65f30fcc6b064f212051","subRelType":"outcome","target":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395836359,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::7713935a3e4c65f30fcc6b064f212051","subRelType":"outcome","target":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","target":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","target":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","target":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","target":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395829268,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::8fb7796d545978ab9e38cf5cc38e4bb7","subRelType":"outcome","target":"50|dedup_wf_001::6681aacc33bd9eafd8993a68929e36a6"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395829268,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::8fb7796d545978ab9e38cf5cc38e4bb7","subRelType":"outcome","target":"50|dedup_wf_001::6681aacc33bd9eafd8993a68929e36a6"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831579,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::b432ea401d98a6e72c3dd17c316a3a0a","subRelType":"outcome","target":"50|dedup_wf_001::86228f7b229d657dff902b4f08154161"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831579,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::b432ea401d98a6e72c3dd17c316a3a0a","subRelType":"outcome","target":"50|dedup_wf_001::86228f7b229d657dff902b4f08154161"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828923,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::dbd4dcfb0bf316a796a5512f7aeeccf5","subRelType":"outcome","target":"50|dedup_wf_001::03b35e23ff5f1dbc5f95db4505003244"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828923,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|aka_________::dbd4dcfb0bf316a796a5512f7aeeccf5","subRelType":"outcome","target":"50|dedup_wf_001::03b35e23ff5f1dbc5f95db4505003244"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395830521,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|anr_________::01703d216eb2d01f73e9f476302cd341","subRelType":"outcome","target":"50|dedup_wf_001::d4b9ca4bf332403c15abb47d21963ddc"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395830521,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|anr_________::01703d216eb2d01f73e9f476302cd341","subRelType":"outcome","target":"50|dedup_wf_001::d4b9ca4bf332403c15abb47d21963ddc"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395828850,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|anr_________::0229a330cb3e72e8f976a93bef7e0982","subRelType":"outcome","target":"50|od________18::0fa23557cd83522c7db8ccfefad1f12e"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:project:semrel","classname":"Propagation of result to project through semantic relation","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"trust":"0.85"},"lastupdatetimestamp":null,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|anr_________::7f0f296b5e4ec1a1d61cfe4da63406c7","subRelType":"outcome","target":"50|dedup_wf_001::595ab3bf9261fbd616fb0240fb78add2"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395837446,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|anr_________::a7322657c4ea4c9a3dc7cb77deb51620","subRelType":"outcome","target":"50|od_______166::953fd0631804ebda31b415fea855d570"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395837127,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|anr_________::aa9c752447e0e51abe37bb4eaf241aab","subRelType":"outcome","target":"50|od_______165::1cefb61fab1ae3feaa06534f63fdf296"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395829563,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|anr_________::d63a83f19b39261984f4d86cb6675dcf","subRelType":"outcome","target":"50|dedup_wf_001::03f8d5869d44dbceb8ffdeb4cda9b26d"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395829250,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|anr_________::d90f47fc8f5804b3f4b5248e76b70ca1","subRelType":"outcome","target":"50|dedup_wf_001::6635213fb0d093f0e57fedb90fdbdd2d"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395829250,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|anr_________::d90f47fc8f5804b3f4b5248e76b70ca1","subRelType":"outcome","target":"50|dedup_wf_001::6635213fb0d093f0e57fedb90fdbdd2d"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395837587,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::239d347dc833846f10f86372c317f1e2","subRelType":"outcome","target":"50|dedup_wf_001::b09492f75c77b4eb02592fc18714ba70"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395837587,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::239d347dc833846f10f86372c317f1e2","subRelType":"outcome","target":"50|dedup_wf_001::b09492f75c77b4eb02592fc18714ba70"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395829283,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::2f09192c987e2760ce4370596f07b9e1","subRelType":"outcome","target":"50|dedup_wf_001::66cdebe05b0fd513c341e035a527747d"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395829283,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::2f09192c987e2760ce4370596f07b9e1","subRelType":"outcome","target":"50|dedup_wf_001::66cdebe05b0fd513c341e035a527747d"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395837233,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::373f3b8b4f41532f7db8aa38ab7cd08b","subRelType":"outcome","target":"50|dedup_wf_001::53539b755aee01ccddf01fba0a49dc18"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395837233,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::373f3b8b4f41532f7db8aa38ab7cd08b","subRelType":"outcome","target":"50|dedup_wf_001::53539b755aee01ccddf01fba0a49dc18"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395831736,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::59e8cbab65f186502666a9ff1a1ce4f9","subRelType":"outcome","target":"50|dedup_wf_001::879efe151088c17ac5ebe22d33e93927"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395831736,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::59e8cbab65f186502666a9ff1a1ce4f9","subRelType":"outcome","target":"50|dedup_wf_001::879efe151088c17ac5ebe22d33e93927"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395829058,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::67abe0bdf1d297546317efca7e8da326","subRelType":"outcome","target":"50|dedup_wf_001::4700390fca078f30ee91913f9c100d9c"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395828858,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::6fbeacae3e2d49442c396aab11db0db0","subRelType":"outcome","target":"50|dedup_wf_001::bf2abb82d0dcef9731fd185cc2b5051a"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395828858,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::6fbeacae3e2d49442c396aab11db0db0","subRelType":"outcome","target":"50|dedup_wf_001::bf2abb82d0dcef9731fd185cc2b5051a"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395833571,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::79ac06898eefadb3536c7a80955f78fb","subRelType":"outcome","target":"50|dedup_wf_001::38f2848b3df19d244cf35b17c8f3853f"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395833571,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::79ac06898eefadb3536c7a80955f78fb","subRelType":"outcome","target":"50|dedup_wf_001::38f2848b3df19d244cf35b17c8f3853f"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395829922,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::bb93b41daff6e47c004e20152fb41ea7","subRelType":"outcome","target":"50|dedup_wf_001::721ff2a1e0d1229d33391abc4c143cd5"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395829922,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::bb93b41daff6e47c004e20152fb41ea7","subRelType":"outcome","target":"50|dedup_wf_001::721ff2a1e0d1229d33391abc4c143cd5"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395831083,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::dac883cd0b2debb04f442e81ff3cc51a","subRelType":"outcome","target":"50|dedup_wf_001::26790120bd5562974754b581a0db8d07"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395831083,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::dac883cd0b2debb04f442e81ff3cc51a","subRelType":"outcome","target":"50|dedup_wf_001::26790120bd5562974754b581a0db8d07"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395833709,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::e032e80ee457a2dbe97e203556c0e951","subRelType":"outcome","target":"50|dedup_wf_001::39fead2d48bf47d85109268bdee675e8"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395833709,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::e032e80ee457a2dbe97e203556c0e951","subRelType":"outcome","target":"50|dedup_wf_001::39fead2d48bf47d85109268bdee675e8"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395837304,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::ede88552e4b9ebe54096182d726a7b9c","subRelType":"outcome","target":"50|dedup_wf_001::5408c54c9ae62501eb222a5b409f1e97"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395837304,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::ede88552e4b9ebe54096182d726a7b9c","subRelType":"outcome","target":"50|dedup_wf_001::5408c54c9ae62501eb222a5b409f1e97"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395837176,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::fffb682484230acc97dd7f17c5fbceb8","subRelType":"outcome","target":"50|dedup_wf_001::acb8c02749849dafabb4018c07a2b52f"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395837176,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|arc_________::fffb682484230acc97dd7f17c5fbceb8","subRelType":"outcome","target":"50|dedup_wf_001::acb8c02749849dafabb4018c07a2b52f"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395828710,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::01a06bbd4572761039c8e44d65ede376"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395828710,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::01a06bbd4572761039c8e44d65ede376"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:project:semrel","classname":"Propagation of result to project through semantic relation","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"trust":"0.85"},"lastupdatetimestamp":null,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::12b59a56d4d4ffb1a39cdc2890215ede"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395832239,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::30f31e1177d90f36a1eef14fe1b623b7"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395832239,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::30f31e1177d90f36a1eef14fe1b623b7"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395834545,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::3f7c8689bb105e5eb4fde42bfbc16f58"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395834545,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::3f7c8689bb105e5eb4fde42bfbc16f58"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395836294,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::4b9f805f45a9f10e0f165081df59c809"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395836294,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::4b9f805f45a9f10e0f165081df59c809"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:project:semrel","classname":"Propagation of result to project through semantic relation","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"trust":"0.85"},"lastupdatetimestamp":null,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::4c064368514b9a14115eacc7b073f313"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395838085,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::5a98e88d186ff3adb6bca263b894d80e"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395838085,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::5a98e88d186ff3adb6bca263b894d80e"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395829002,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::630b198ba350a7f97b43459adef9842d"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395829002,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::630b198ba350a7f97b43459adef9842d"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395832942,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::915ffc10b73994d0d56dce5210607105"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395832942,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::915ffc10b73994d0d56dce5210607105"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395829757,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::c86ff4584d9966b8bff4bc27f69089e7"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395829757,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::c86ff4584d9966b8bff4bc27f69089e7"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395830558,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::d55ba97fec119365a0ee5fd04f5e883f"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395830558,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::d55ba97fec119365a0ee5fd04f5e883f"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395834856,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::f95d2b27b7f5e999128fec774274f91a"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395834856,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|cihr________::1e5e62235d094afd01cd56e65112fc63","subRelType":"outcome","target":"50|dedup_wf_001::f95d2b27b7f5e999128fec774274f91a"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395837300,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|conicytf____::6c5ec84f44c8df4320511a2120b13d8e","subRelType":"outcome","target":"50|od_______166::045efaf4fb9c583c4596cdd35ee2fc16"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395834956,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|conicytf____::f3cec35cb438d4fda883b61f7f2dbeaa","subRelType":"outcome","target":"50|dedup_wf_001::42f51a68fbb864f9782b693ab74cd012"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395834956,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|conicytf____::f3cec35cb438d4fda883b61f7f2dbeaa","subRelType":"outcome","target":"50|dedup_wf_001::42f51a68fbb864f9782b693ab74cd012"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.897"},"lastupdatetimestamp":1590395838198,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::0604445004cc1b9633b92d07a972800b","subRelType":"outcome","target":"50|dedup_wf_001::5b217df379907c8eb5e21be4282019fd"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.897"},"lastupdatetimestamp":1590395838198,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::0604445004cc1b9633b92d07a972800b","subRelType":"outcome","target":"50|dedup_wf_001::5b217df379907c8eb5e21be4282019fd"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591282645875,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::067aaaf35be5e6091bcd587a971e1776","subRelType":"outcome","target":"50|erc_________::198b7aa08fb434f6b30fa05076a6548b"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591282774850,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::08ff5c51783c0577beff317dc60cf9d5","subRelType":"outcome","target":"50|dedup_wf_001::f81cdf89f1cebad365a79630a3d14038"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591282727738,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::08ff5c51783c0577beff317dc60cf9d5","subRelType":"outcome","target":"50|dedup_wf_001::f81cdf89f1cebad365a79630a3d14038"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591282761144,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::0aabaab7bbd3bece6b42bb90cc9460e1","subRelType":"outcome","target":"50|dedup_wf_001::1a77e81656e7a2902afad56035caf299"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6977"},"lastupdatetimestamp":1590395831731,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::0c0f27a478b84319d2b222221c8f9ee8","subRelType":"outcome","target":"50|dedup_wf_001::e304a313e9f48c2dc47cb8b108ddc526"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591283071743,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::0c0f27a478b84319d2b222221c8f9ee8","subRelType":"outcome","target":"50|dedup_wf_001::e304a313e9f48c2dc47cb8b108ddc526"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6977"},"lastupdatetimestamp":1590395831731,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::0c0f27a478b84319d2b222221c8f9ee8","subRelType":"outcome","target":"50|dedup_wf_001::e304a313e9f48c2dc47cb8b108ddc526"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591282741565,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::0d6b0eb936b499f984a5712aa926fdf5","subRelType":"outcome","target":"50|erc_________::803dd2397322a44b55df125d7f701365"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591282849150,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::1285dc778cf68267f410d7f318e0453f","subRelType":"outcome","target":"50|fp7_env_____::62c2651f58f1f1ce7b8ffe1cc475d5e7"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591282701346,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::18c22e20f080a7f5f4c8f8272432f6cb","subRelType":"outcome","target":"50|dedup_wf_001::ef17ed0434d26d25311f99d73e1fecc9"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591282770162,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::2287131790d1bdb52e61a72b6ba46c8b","subRelType":"outcome","target":"50|od_______935::398ef99c7c79cf8888e37af06a45def2"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.897"},"lastupdatetimestamp":1555635338339,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::22cf08edbe21df883cef311dbb1b2113","subRelType":"outcome","target":"50|dedup_wf_001::0f494c6243fbd3750333d11478afd02f"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.897"},"lastupdatetimestamp":1590395829588,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::22cf08edbe21df883cef311dbb1b2113","subRelType":"outcome","target":"50|dedup_wf_001::0f494c6243fbd3750333d11478afd02f"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.897"},"lastupdatetimestamp":1590395829588,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::22cf08edbe21df883cef311dbb1b2113","subRelType":"outcome","target":"50|dedup_wf_001::0f494c6243fbd3750333d11478afd02f"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:project:semrel","classname":"Propagation of result to project through semantic relation","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"trust":"0.85"},"lastupdatetimestamp":null,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::24f2aa53f1ee86a02418c0e3ce9b60a0","subRelType":"outcome","target":"50|dedup_wf_001::9262b23abe0c7b1fcf31828607e640a4"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:project:semrel","classname":"Propagation of result to project through semantic relation","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"trust":"0.85"},"lastupdatetimestamp":null,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::2696de7f52345c24f5ae3ea1edaee035","subRelType":"outcome","target":"50|dedup_wf_001::d98e86b111c906eaa0f7ce6f4dc05142"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591282776508,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::274facc0c23313831099146a2fa523c8","subRelType":"outcome","target":"50|dedup_wf_001::27b6abc6bb2c95d16e38643352f1ffd7"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591282736014,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::274facc0c23313831099146a2fa523c8","subRelType":"outcome","target":"50|dedup_wf_001::27b6abc6bb2c95d16e38643352f1ffd7"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.7777"},"lastupdatetimestamp":1555635339676,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::27a79eaf8d74a68777299f3a514ea51f","subRelType":"outcome","target":"50|dedup_wf_001::8b1c19559479f88648f9866f092ac005"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.7777"},"lastupdatetimestamp":1590395832133,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::27a79eaf8d74a68777299f3a514ea51f","subRelType":"outcome","target":"50|dedup_wf_001::8b1c19559479f88648f9866f092ac005"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.7777"},"lastupdatetimestamp":1590395832133,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::27a79eaf8d74a68777299f3a514ea51f","subRelType":"outcome","target":"50|dedup_wf_001::8b1c19559479f88648f9866f092ac005"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591283331471,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::295cf2292ad0403164e9c1c40068dfe0","subRelType":"outcome","target":"50|dedup_wf_001::6b00308ad34874602c25d1f738da8984"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591282896572,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::2a1fa9eaa6b51a2c73404fbfaf7c80c2","subRelType":"outcome","target":"50|dedup_wf_001::ac76c06a5e506e88f09fd9cf70d4263d"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.5462"},"lastupdatetimestamp":1590395831038,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::2dcb61583a11cb8bc7919bac2070bbdc","subRelType":"outcome","target":"50|dedup_wf_001::25e95361edaf3d662cd1a433ec86466a"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.5462"},"lastupdatetimestamp":1590395831038,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::2dcb61583a11cb8bc7919bac2070bbdc","subRelType":"outcome","target":"50|dedup_wf_001::25e95361edaf3d662cd1a433ec86466a"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.897"},"lastupdatetimestamp":1590395830155,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::2ec0a5064d5161bdfae1c646187be35a","subRelType":"outcome","target":"50|od________18::b4454f3ebe1435875c3422db880dff72"} -{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1591283190345,"properties":[],"relClass":"produces","relType":"resultProject","source":"40|corda_______::2fb5fa7b3fb0df14bf18d6a7cc5b24ea","subRelType":"outcome","target":"50|dedup_wf_001::dd8e2a7614c73137bf9f414d3992b0d9"} \ No newline at end of file +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","source":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831891,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::0f7d119de1f656b5763a16acf876fed6","subRelType":"outcome","source":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395839728,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7713935a3e4c65f30fcc6b064f212051","subRelType":"outcome","source":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395836359,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7713935a3e4c65f30fcc6b064f212051","subRelType":"outcome","source":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","source":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395834612,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::7a1099637c8ec85097e185a00bd4f877","subRelType":"outcome","source":"50|dedup_wf_001::400872751d497a6ff9c7e7aba67d327e"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","source":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828225,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::87bc20a437817d73c2d64555d225e85b","subRelType":"outcome","source":"50|dedup_wf_001::bc92b920df0f280041a512a7912130cf"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395829268,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::8fb7796d545978ab9e38cf5cc38e4bb7","subRelType":"outcome","source":"50|dedup_wf_001::6681aacc33bd9eafd8993a68929e36a6"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395829268,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::8fb7796d545978ab9e38cf5cc38e4bb7","subRelType":"outcome","source":"50|dedup_wf_001::6681aacc33bd9eafd8993a68929e36a6"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831579,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::b432ea401d98a6e72c3dd17c316a3a0a","subRelType":"outcome","source":"50|dedup_wf_001::86228f7b229d657dff902b4f08154161"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395831579,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::b432ea401d98a6e72c3dd17c316a3a0a","subRelType":"outcome","source":"50|dedup_wf_001::86228f7b229d657dff902b4f08154161"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828923,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::dbd4dcfb0bf316a796a5512f7aeeccf5","subRelType":"outcome","source":"50|dedup_wf_001::03b35e23ff5f1dbc5f95db4505003244"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.6573"},"lastupdatetimestamp":1590395828923,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|aka_________::dbd4dcfb0bf316a796a5512f7aeeccf5","subRelType":"outcome","source":"50|dedup_wf_001::03b35e23ff5f1dbc5f95db4505003244"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395830521,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|anr_________::01703d216eb2d01f73e9f476302cd341","subRelType":"outcome","source":"50|dedup_wf_001::d4b9ca4bf332403c15abb47d21963ddc"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395830521,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|anr_________::01703d216eb2d01f73e9f476302cd341","subRelType":"outcome","source":"50|dedup_wf_001::d4b9ca4bf332403c15abb47d21963ddc"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395828850,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|anr_________::0229a330cb3e72e8f976a93bef7e0982","subRelType":"outcome","source":"50|od________18::0fa23557cd83522c7db8ccfefad1f12e"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"propagation","inferred":true,"invisible":false,"provenanceaction":{"classid":"result:project:semrel","classname":"Propagation of result to project through semantic relation","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"trust":"0.85"},"lastupdatetimestamp":null,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|anr_________::7f0f296b5e4ec1a1d61cfe4da63406c7","subRelType":"outcome","source":"50|dedup_wf_001::595ab3bf9261fbd616fb0240fb78add2"} +{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_referencedProjects","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.72"},"lastupdatetimestamp":1590395837446,"properties":[],"relClass":"isProducedBy","relType":"resultProject","target":"40|anr_________::a7322657c4ea4c9a3dc7cb77deb51620","subRelType":"outcome","source":"50|od_______166::953fd0631804ebda31b415fea855d570"} \ No newline at end of file From b2c455f8837ef82e39cf2af8444b02ed61bccc0f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 14:08:09 +0100 Subject: [PATCH 066/108] added java doc --- .../oa/graph/dump/funderresults/SparkDumpFunderResults.java | 5 ++--- .../graph/dump/funderresults/SparkResultLinkedToProject.java | 5 +++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index e2d8d7cb1..7049bdacf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -25,9 +25,8 @@ import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; /** - * Preparation of the Project information to be added to the dumped results. For each result associated to at least one - * Project, a serialization of an instance af ResultProject closs is done. ResultProject contains the resultId, and the - * list of Projects (as in eu.dnetlib.dhp.schema.dump.oaf.community.Project) it is associated to + * Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC + * for the EC it specifies also the fundingStream (FP7 or H2020) */ public class SparkDumpFunderResults implements Serializable { private static final Logger log = LoggerFactory.getLogger(SparkDumpFunderResults.class); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 8c91fbf1f..4c72cd526 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -24,6 +24,11 @@ import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; +/** + * Selects the results linked to projects. Only for these results the dump will be performed. + * The code to perform the dump and to expend the dumped results with the informaiton related to projects + * is the one used for the dump of the community products + */ public class SparkResultLinkedToProject implements Serializable { private static final Logger log = LoggerFactory.getLogger(SparkResultLinkedToProject.class); From 90d4369fd207663e6baadeb2e0b3a4858910387e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 14:34:58 +0100 Subject: [PATCH 067/108] added test to verify the compression in writing community info on hdfs --- .../graph/dump/complete/CreateEntityTest.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index 702811549..4a3c6ed20 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -20,10 +20,7 @@ import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; @@ -146,6 +143,7 @@ public class CreateEntityTest { } @Test + @Disabled public void test2() throws IOException, ISLookUpException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); @@ -166,8 +164,16 @@ public class CreateEntityTest { final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.getContextInformation(consumer); - List riList = new ArrayList<>(); - cInfoList.forEach(cInfo -> riList.add(Process.getEntity(cInfo))); + //List riList = new ArrayList<>(); + cInfoList.forEach(cInfo -> { + try { + writer.write(new Gson().toJson(Process.getEntity(cInfo))); + } catch (IOException e) { + e.printStackTrace(); + } + }); + + writer.close(); } } From 66c0e3e574d79da701b9cdf45f9d2c0616113675 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 17:52:17 +0100 Subject: [PATCH 068/108] changed because of https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/61#issuecomment-2489 --- .../src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java index 89fa09f3c..76017d5b7 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java @@ -90,9 +90,6 @@ public class MakeTarArchive implements Serializable { String p_string = p.toString(); if (!p_string.endsWith("_SUCCESS")) { String name = p_string.substring(p_string.lastIndexOf("/") + 1); -// if (name.trim().equalsIgnoreCase("communities_infrastructures")) { -// name = "communities_infrastructures.json"; -// } TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name); entry.setSize(fileStatus.getLen()); current_size += fileStatus.getLen(); From 1df94b85b4f037b400498f1b0e462c2ccc9f03d5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 17:57:43 +0100 Subject: [PATCH 069/108] changed because of https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/61#issuecomment-2491 --- .../eu/dnetlib/dhp/oa/graph/dump/Constants.java | 1 - .../funderresults/SparkDumpFunderResults.java | 16 +++++++++------- .../SparkResultLinkedToProject.java | 9 ++++++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java index c64554555..eb01ebf32 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java @@ -26,7 +26,6 @@ public class Constants { public static String ORCID = "orcid"; - public static String RESULT_PROJECT_IS_PRODUCED_BY = "isProducedBy"; static { accessRightsCoarMap.put("OPEN", "c_abf2"); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 7049bdacf..826b4be8c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -6,7 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.*; -import eu.dnetlib.dhp.oa.graph.dump.Constants; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -16,6 +16,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.api.zenodo.Community; +import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; @@ -72,7 +73,9 @@ public class SparkDumpFunderResults implements Serializable { Dataset relation = Utils .readPath(spark, relationPath + "/relation", Relation.class) - .filter("dataInfo.deletedbyinference = false and lower(relClass) = '" + Constants.RESULT_PROJECT_IS_PRODUCED_BY.toLowerCase()+ "'"); + .filter( + "dataInfo.deletedbyinference = false and lower(relClass) = '" + + ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'"); Dataset result = Utils .readPath(spark, inputPath + "/publication", CommunityResult.class) @@ -86,18 +89,17 @@ public class SparkDumpFunderResults implements Serializable { .distinct() .collectAsList(); - funderList.forEach(funder -> { String fundernsp = funder.substring(3); String funderdump; - if (fundernsp.startsWith("corda")){ + if (fundernsp.startsWith("corda")) { funderdump = "EC_"; - if(fundernsp.endsWith("h2020")){ + if (fundernsp.endsWith("h2020")) { funderdump += "H2020"; - }else{ + } else { funderdump += "FP7"; } - }else{ + } else { funderdump = fundernsp.substring(0, fundernsp.indexOf("_")).toUpperCase(); } writeFunderResult(funder, result, outputPath + "/" + funderdump); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 4c72cd526..07e289cbd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -6,7 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.Optional; -import eu.dnetlib.dhp.oa.graph.dump.Constants; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -19,6 +19,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; @@ -74,14 +75,16 @@ public class SparkResultLinkedToProject implements Serializable { } private static void writeResultsLinkedToProjects(SparkSession spark, Class inputClazz, - String inputPath, String outputPath, String relationPath) { + String inputPath, String outputPath, String relationPath) { Dataset results = Utils .readPath(spark, inputPath, inputClazz) .filter("dataInfo.deletedbyinference = false and datainfo.invisible = false"); Dataset relations = Utils .readPath(spark, relationPath, Relation.class) - .filter("dataInfo.deletedbyinference = false and lower(relClass) = '" + Constants.RESULT_PROJECT_IS_PRODUCED_BY.toLowerCase() + "'"); + .filter( + "dataInfo.deletedbyinference = false and lower(relClass) = '" + + ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'"); relations .joinWith( From f5e5e92a101feca613bc5a1232e43eb17d3edca2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 17:58:53 +0100 Subject: [PATCH 070/108] changed because of https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/61#issuecomment-2493 --- .../oa/graph/dump/funderresults/oozie_app/workflow.xml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml index d310390b7..650b972fa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml @@ -121,7 +121,7 @@ - + @@ -274,7 +274,6 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/result/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${workingDir}/dump/publication --communityMapPath${workingDir}/communityMap @@ -302,7 +301,6 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/result/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${workingDir}/dump/dataset --communityMapPath${workingDir}/communityMap @@ -330,7 +328,6 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/result/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${workingDir}/dump/otherresearchproduct --communityMapPath${workingDir}/communityMap @@ -358,7 +355,6 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --sourcePath${workingDir}/result/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${workingDir}/dump/software --communityMapPath${workingDir}/communityMap @@ -532,8 +528,7 @@ eu.dnetlib.dhp.oa.graph.dump.MakeTar --hdfsPath${outputPath} --nameNode${nameNode} - - --sourcePath/user/miriam.baglioni/graph_dump_whole_production_funder_results + --sourcePath${workingDir}/resultperfunder From d4ddde2ef2d5b6d8d2d50b36882a8e6e0b005cae Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 18:01:01 +0100 Subject: [PATCH 071/108] changed because of https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/61#issuecomment-2503 --- .../eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index 4a3c6ed20..f179e9de5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -164,7 +164,6 @@ public class CreateEntityTest { final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.getContextInformation(consumer); - //List riList = new ArrayList<>(); cInfoList.forEach(cInfo -> { try { writer.write(new Gson().toJson(Process.getEntity(cInfo))); From ed01e5a5e1feef7ba37ee7568290b75c31e74ede Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 18:09:34 +0100 Subject: [PATCH 072/108] https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/61#issuecomment-2507 --- .../dnetlib/dhp/oa/graph/dump/input_parameters.json | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json index ca12ae3ec..1d986fc26 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json @@ -29,12 +29,13 @@ "paramLongName":"resultTableName", "paramDescription": "the name of the result table we are currently working on", "paramRequired": true - },{ - "paramName":"dt", - "paramLongName":"dumpType", - "paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project", - "paramRequired": false -} + }, + { + "paramName":"dt", + "paramLongName":"dumpType", + "paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project", + "paramRequired": false + } ] From 5fbe54ef54b68e4ec972666ea112a6c81494a1c6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 18:10:28 +0100 Subject: [PATCH 073/108] https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/61#issuecomment-2509 --- .../oa/graph/dump/input_parameters_link_prj.json | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json index 5c4886741..fdd9492fe 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json @@ -22,12 +22,13 @@ "paramLongName":"resultTableName", "paramDescription": "the name of the result table we are currently working on", "paramRequired": true - }, { - "paramName":"rp", - "paramLongName":"relationPath", - "paramDescription": "the path to the relations", - "paramRequired": true -} + }, + { + "paramName":"rp", + "paramLongName":"relationPath", + "paramDescription": "the path to the relations", + "paramRequired": true + } ] From 1a89f8211c6ec589305a77188a84b54268ff30d6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 18:12:40 +0100 Subject: [PATCH 074/108] https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/61#issuecomment-2505 --- .../dhp/oa/graph/dump/complete/CreateEntityTest.java | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index f179e9de5..20c578738 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -164,14 +164,9 @@ public class CreateEntityTest { final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.getContextInformation(consumer); - cInfoList.forEach(cInfo -> { - try { - writer.write(new Gson().toJson(Process.getEntity(cInfo))); - } catch (IOException e) { - e.printStackTrace(); - } - }); - + for(ContextInfo cInfo: cInfoList){ + writer.write(new Gson().toJson(Process.getEntity(cInfo))); + } writer.close(); } From 124591a7f32d58b70166b22a6d1ad297c67d6c36 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Nov 2020 18:23:28 +0100 Subject: [PATCH 075/108] refactoring --- .../dnetlib/dhp/oa/graph/dump/Constants.java | 1 - .../funderresults/SparkDumpFunderResults.java | 2 +- .../SparkResultLinkedToProject.java | 2 +- .../graph/dump/complete/CreateEntityTest.java | 2 +- .../ResultLinkedToProjectTest.java | 172 +++++++-------- .../dump/funderresult/SplitPerFunderTest.java | 195 +++++++++--------- 6 files changed, 188 insertions(+), 186 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java index eb01ebf32..86a275ae2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java @@ -26,7 +26,6 @@ public class Constants { public static String ORCID = "orcid"; - static { accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("RESTRICTED", "c_16ec"); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 826b4be8c..00f604b14 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -6,7 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.*; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -20,6 +19,7 @@ import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.oaf.Relation; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 07e289cbd..1a28a21f4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -6,7 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.Optional; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -21,6 +20,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index 20c578738..3ecbd1894 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -164,7 +164,7 @@ public class CreateEntityTest { final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.getContextInformation(consumer); - for(ContextInfo cInfo: cInfoList){ + for (ContextInfo cInfo : cInfoList) { writer.write(new Gson().toJson(Process.getEntity(cInfo))); } writer.close(); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java index b7ee5b122..dbe3db6fe 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java @@ -1,9 +1,11 @@ + package eu.dnetlib.dhp.oa.graph.dump.funderresult; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Result; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; + import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -17,117 +19,119 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; public class ResultLinkedToProjectTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static SparkSession spark; + private static SparkSession spark; - private static Path workingDir; + private static Path workingDir; - private static final Logger log = LoggerFactory - .getLogger(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class); + private static final Logger log = LoggerFactory + .getLogger(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class); - private static HashMap map = new HashMap<>(); + private static HashMap map = new HashMap<>(); - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory( + eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); - SparkConf conf = new SparkConf(); - conf.setAppName(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class.getSimpleName()); + SparkConf conf = new SparkConf(); + conf.setAppName(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class.getSimpleName()); - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - spark = SparkSession - .builder() - .appName(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } + spark = SparkSession + .builder() + .appName(eu.dnetlib.dhp.oa.graph.dump.funderresult.ResultLinkedToProjectTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } - @Test - public void testNoMatch() throws Exception { + @Test + public void testNoMatch() throws Exception { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json") - .getPath(); + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json") + .getPath(); - final String relationPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/relations.json") - .getPath(); + final String relationPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/relations.json") + .getPath(); - SparkResultLinkedToProject.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/preparedInfo", - "-sourcePath", sourcePath, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", - "-relationPath", relationPath + SparkResultLinkedToProject.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/preparedInfo", + "-sourcePath", sourcePath, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-relationPath", relationPath - }); + }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/preparedInfo") - .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/preparedInfo") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Result.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Result.class)); - Assertions.assertEquals(0, verificationDataset.count()); + Assertions.assertEquals(0, verificationDataset.count()); - } + } - @Test - public void testMatchOne() throws Exception { + @Test + public void testMatchOne() throws Exception { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json") - .getPath(); + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json") + .getPath(); - final String relationPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relations.json") - .getPath(); + final String relationPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/relations.json") + .getPath(); - SparkResultLinkedToProject.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/preparedInfo", - "-sourcePath", sourcePath, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", - "-relationPath", relationPath + SparkResultLinkedToProject.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/preparedInfo", + "-sourcePath", sourcePath, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-relationPath", relationPath - }); + }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/preparedInfo") - .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/preparedInfo") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Publication.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Publication.class)); - Assertions.assertEquals(1, verificationDataset.count()); + Assertions.assertEquals(1, verificationDataset.count()); - } + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java index 3381dabe1..71bf5d942 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java @@ -1,11 +1,10 @@ + package eu.dnetlib.dhp.oa.graph.dump.funderresult; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest; -import eu.dnetlib.dhp.oa.graph.dump.SplitForCommunityTest; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunitySplit; -import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults; -import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -19,124 +18,124 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest; +import eu.dnetlib.dhp.oa.graph.dump.SplitForCommunityTest; +import eu.dnetlib.dhp.oa.graph.dump.community.CommunitySplit; +import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults; +import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; public class SplitPerFunderTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static SparkSession spark; + private static SparkSession spark; - private static Path workingDir; + private static Path workingDir; - private static final Logger log = LoggerFactory.getLogger(SplitPerFunderTest.class); + private static final Logger log = LoggerFactory.getLogger(SplitPerFunderTest.class); - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(SplitPerFunderTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(SplitPerFunderTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); - SparkConf conf = new SparkConf(); - conf.setAppName(SplitPerFunderTest.class.getSimpleName()); + SparkConf conf = new SparkConf(); + conf.setAppName(SplitPerFunderTest.class.getSimpleName()); - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - spark = SparkSession - .builder() - .appName(SplitPerFunderTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } + spark = SparkSession + .builder() + .appName(SplitPerFunderTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } - @Test - public void test1() throws Exception { + @Test + public void test1() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump") + .getPath(); - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump") - .getPath(); + SparkDumpFunderResults.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-outputPath", workingDir.toString() + "/split", + "-sourcePath", sourcePath, + "-relationPath", sourcePath + }); - SparkDumpFunderResults.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/split", - "-sourcePath", sourcePath, - "-relationPath", sourcePath + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - }); + // FP7 3 + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/split/EC_FP7") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + Assertions.assertEquals(3, verificationDataset.count()); - // FP7 3 - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/split/EC_FP7") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions + .assertEquals( + 1, verificationDataset.filter("id = '50|dedup_wf_001::0d16b1714ab3077df73893a8ea57d776'").count()); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + // CIHR 2 + tmp = sc + .textFile(workingDir.toString() + "/split/CIHR") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(2, tmp.count()); - Assertions.assertEquals(3, verificationDataset.count()); + // NWO 1 + tmp = sc + .textFile(workingDir.toString() + "/split/NWO") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(1, tmp.count()); - Assertions - .assertEquals( - 1, verificationDataset.filter("id = '50|dedup_wf_001::0d16b1714ab3077df73893a8ea57d776'").count()); + // NIH 3 + tmp = sc + .textFile(workingDir.toString() + "/split/NIH") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(2, tmp.count()); + // NSF 1 + tmp = sc + .textFile(workingDir.toString() + "/split/NSF") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(1, tmp.count()); - // CIHR 2 - tmp = sc - .textFile(workingDir.toString() + "/split/CIHR") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(2, tmp.count()); + // SNSF 1 + tmp = sc + .textFile(workingDir.toString() + "/split/SNSF") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(1, tmp.count()); - //NWO 1 - tmp = sc - .textFile(workingDir.toString() + "/split/NWO") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(1, tmp.count()); + // NHMRC 1 + tmp = sc + .textFile(workingDir.toString() + "/split/NHMRC") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(1, tmp.count()); - //NIH 3 - tmp = sc - .textFile(workingDir.toString() + "/split/NIH") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(2, tmp.count()); + // H2020 3 + tmp = sc + .textFile(workingDir.toString() + "/split/EC_H2020") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + Assertions.assertEquals(3, tmp.count()); - //NSF 1 - tmp = sc - .textFile(workingDir.toString() + "/split/NSF") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(1, tmp.count()); - - //SNSF 1 - tmp = sc - .textFile(workingDir.toString() + "/split/SNSF") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(1, tmp.count()); - - //NHMRC 1 - tmp = sc - .textFile(workingDir.toString() + "/split/NHMRC") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(1, tmp.count()); - - //H2020 3 - tmp = sc - .textFile(workingDir.toString() + "/split/EC_H2020") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(3, tmp.count()); - - } + } } From 815d6b25d945f9de462045c16d97631a97ab2acc Mon Sep 17 00:00:00 2001 From: antleb Date: Mon, 30 Nov 2020 00:48:10 +0200 Subject: [PATCH 076/108] added last step to update cache --- .../dhp/oa/graph/stats/oozie_app/config-default.xml | 4 ++++ .../dhp/oa/graph/stats/oozie_app/updateCache.sh | 4 ++++ .../dhp/oa/graph/stats/oozie_app/workflow.xml | 12 ++++++++++++ 3 files changed, 20 insertions(+) create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml index 2cd53a37b..9331d4ac5 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml @@ -27,4 +27,8 @@ oozie.wf.workflow.notification.url {serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status + + stats_tool_api_url + ${stats_tool_api_url} + \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh new file mode 100644 index 000000000..36e74a556 --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/updateCache.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +curl --request GET $1/cache/updateCache + diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index d6cc14e25..0b6a00df1 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -287,6 +287,18 @@ ${wf:appPath()}/scripts/step19.sql impala-shell.sh + + + + + + + ${jobTracker} + ${nameNode} + updateCache.sh + ${stats_tool_api_url} + updateCache.sh + From 40c4559e923c5e2f0124477dc8deb8bfe2ef22d5 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Mon, 30 Nov 2020 14:19:22 +0100 Subject: [PATCH 077/108] added datainfo on authors pid with "sysimport:crosswalk:entityregistry", --- .../doiboost/orcidnodoi/oaf/PublicationToOaf.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index ece59c3f1..18fecc6c2 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -144,7 +144,7 @@ public class PublicationToOaf implements Serializable { publication.setLastupdatetimestamp(new Date().getTime()); - publication.setDateofcollection("2019-10-22"); + publication.setDateofcollection("2020-10-14"); publication.setDateoftransformation(DumpToActionsUtility.now_ISO8601()); // Adding external ids @@ -526,6 +526,18 @@ public class PublicationToOaf implements Serializable { q.setSchemeid(ModelConstants.DNET_PID_TYPES); q.setSchemename(ModelConstants.DNET_PID_TYPES); sp.setQualifier(q); + final DataInfo dataInfo = new DataInfo(); + dataInfo.setDeletedbyinference(false); + dataInfo.setInferred(false); + dataInfo.setTrust("0.9"); + dataInfo + .setProvenanceaction( + mapQualifier( + "sysimport:crosswalk:entityregistry", + "Harvested", + "dnet:provenanceActions", + "dnet:provenanceActions")); + sp.setDataInfo(dataInfo); return sp; } } From f8468c9c2296905c0f5102df78a797f4ab0ccf7c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 1 Dec 2020 20:09:35 +0100 Subject: [PATCH 078/108] added extention for new author pid (orcid_pending) --- .../src/main/java/eu/dnetlib/dhp/PropagationConstant.java | 5 ++++- .../PrepareResultOrcidAssociationStep1.java | 3 ++- .../SparkOrcidToResultFromSemRelJob.java | 5 +++-- .../dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java | 5 ++++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 1cc41c395..d38d79fec 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -44,7 +44,10 @@ public class PropagationConstant { public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; - public static final String PROPAGATION_AUTHOR_PID = "ORCID"; + public static final String PROPAGATION_AUTHOR_PID_CLASSID = "orcid_pending"; + public static final String ORCID = "orcid"; + public static final String PROPAGATION_AUTHOR_PID_CLASSNAME = "Open Researcher and Contributor ID"; + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 869831ba2..1e445828c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -102,7 +102,8 @@ public class PrepareResultOrcidAssociationStep1 { + " FROM result " + " LATERAL VIEW EXPLODE (author) a AS MyT " + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " - + " WHERE lower(MyP.qualifier.classid) = 'orcid') tmp " + + " WHERE lower(MyP.qualifier.classid) = '" + ORCID + "' or " + +" lower(MyP.qalifier.classid) = '" + PROPAGATION_AUTHOR_PID_CLASSID + "') tmp " + " GROUP BY id) r_t " + " JOIN (" + " SELECT source, target " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 3fc127064..3e5c4d641 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -176,7 +176,7 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); + p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID_CLASSID, PROPAGATION_AUTHOR_PID_CLASSNAME)); p .setDataInfo( getDataInfo( @@ -201,7 +201,8 @@ public class SparkOrcidToResultFromSemRelJob { return false; } for (StructuredProperty pid : pids.get()) { - if (PROPAGATION_AUTHOR_PID.equals(pid.getQualifier().getClassid())) { + if (PROPAGATION_AUTHOR_PID_CLASSID.equals(pid.getQualifier().getClassid().toLowerCase()) || + ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { return true; } } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index edd2e7ba7..aeaa8a3c1 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -5,6 +5,8 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver; +import eu.dnetlib.dhp.PropagationConstant; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -166,7 +168,8 @@ public class OrcidPropagationJobTest { propagatedAuthors .filter( "id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' " - + "and name = 'Vajinder' and surname = 'Kumar' and pidType = 'ORCID'") + + "and name = 'Vajinder' and surname = 'Kumar' and pidType = '" + + PropagationConstant.PROPAGATION_AUTHOR_PID_CLASSID +"'") .count()); Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count()); From 57f448b7a423030f7d745e80a6fc7100ed480e57 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 2 Dec 2020 10:44:05 +0100 Subject: [PATCH 079/108] graph cleaning workflow separate orcid_pending from orcid, depending on the author pid provenance --- .../dhp/schema/common/ModelConstants.java | 3 +++ .../dhp/oa/graph/clean/CleaningFunctions.java | 25 ++++++++++++++++++- .../eu/dnetlib/dhp/oa/graph/clean/result.json | 22 ++++++++++++++++ .../eu/dnetlib/dhp/oa/graph/clean/terms.txt | 1 + 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index d759f0d55..0b4d29c8e 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -7,6 +7,9 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier; public class ModelConstants { + public static final String ORCID = "orcid"; + public static final String ORCID_PENDING = "orcid_pending"; + public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies"; public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies"; public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource"; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java index 5155d0242..945f717bb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java @@ -189,6 +189,14 @@ public class CleaningFunctions { author.setRank(i++); } } + + final Set collectedFrom = Optional + .ofNullable(r.getCollectedfrom()) + .map(c -> c.stream() + .map(KeyValue::getKey) + .collect(Collectors.toCollection(HashSet::new))) + .orElse(new HashSet<>()); + for (Author a : r.getAuthor()) { if (Objects.isNull(a.getPid())) { a.setPid(Lists.newArrayList()); @@ -201,13 +209,28 @@ public class CleaningFunctions { .filter(p -> Objects.nonNull(p.getQualifier())) .filter(p -> StringUtils.isNotBlank(p.getValue())) .map(p -> { + // hack to distinguish orcid from orcid_pending + String pidProvenance = Optional + .ofNullable(p.getDataInfo()) + .map(d -> Optional + .ofNullable(d.getProvenanceaction()) + .map(Qualifier::getClassid) + .orElse("")) + .orElse(""); + if (pidProvenance.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) { + p.getQualifier().setClassid(ModelConstants.ORCID); + } else { + p.getQualifier().setClassid(ModelConstants.ORCID_PENDING); + } p.setValue(p.getValue().trim().replaceAll(ORCID_PREFIX_REGEX, "")); return p; }) .collect( Collectors .toMap( - StructuredProperty::getValue, Function.identity(), (p1, p2) -> p1, + p -> p.getQualifier().getClassid() + p.getValue(), + Function.identity(), + (p1, p2) -> p1, LinkedHashMap::new)) .values() .stream() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json index 5c903cd0e..e746d236e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/result.json @@ -49,6 +49,28 @@ "schemename": "dnet:pid_types" }, "value": "https://orcid.org/0000-0001-9613-6639" + }, + { + "dataInfo": { + "deletedbyinference": false, + "inferenceprovenance": "", + "inferred": false, + "invisible": false, + "provenanceaction": { + "classid": "sysimport:crosswalk:entityregistry", + "classname": "sysimport:crosswalk:entityregistry", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + }, + "trust": "0.9" + }, + "qualifier": { + "classid": "orcid", + "classname": "ORCID12", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "value": "0000-0001-9613-6639" } ], "rank": 1, diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index 93cc00eca..67c070d1d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1031,6 +1031,7 @@ dnet:pid_types @=@ dnet:pid_types @=@ jprn @=@ JPRN Identifier dnet:pid_types @=@ dnet:pid_types @=@ mag_id @=@ Microsoft Academic Graph Identifier dnet:pid_types @=@ dnet:pid_types @=@ oai @=@ Open Archives Initiative dnet:pid_types @=@ dnet:pid_types @=@ orcid @=@ Open Researcher and Contributor ID +dnet:pid_types @=@ dnet:pid_types @=@ orcid_pending @=@ Open Researcher and Contributor ID dnet:pid_types @=@ dnet:pid_types @=@ PANGAEA @=@ PANGAEA dnet:pid_types @=@ dnet:pid_types @=@ epo_nr_epodoc @=@ Patent application number in EPODOC format dnet:pid_types @=@ dnet:pid_types @=@ UNKNOWN @=@ UNKNOWN From 51c582c08ccceaf428aa5ff4998afa4a2cead26a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 2 Dec 2020 11:12:54 +0100 Subject: [PATCH 080/108] added orcid class name among the constants set --- .../main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index 0b4d29c8e..1efa86586 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -9,6 +9,7 @@ public class ModelConstants { public static final String ORCID = "orcid"; public static final String ORCID_PENDING = "orcid_pending"; + public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID"; public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies"; public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies"; From cd285e98bc255e153dac982665e08698af39f10f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 2 Dec 2020 11:13:23 +0100 Subject: [PATCH 081/108] usoing the constants defined in the ModelConstants class --- .../src/main/java/eu/dnetlib/dhp/PropagationConstant.java | 4 +--- .../PrepareResultOrcidAssociationStep1.java | 5 +++-- .../SparkOrcidToResultFromSemRelJob.java | 7 ++++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index d38d79fec..360cf5ffa 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -44,9 +44,7 @@ public class PropagationConstant { public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; - public static final String PROPAGATION_AUTHOR_PID_CLASSID = "orcid_pending"; - public static final String ORCID = "orcid"; - public static final String PROPAGATION_AUTHOR_PID_CLASSNAME = "Open Researcher and Contributor ID"; + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 1e445828c..99ae1ee2d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -102,8 +103,8 @@ public class PrepareResultOrcidAssociationStep1 { + " FROM result " + " LATERAL VIEW EXPLODE (author) a AS MyT " + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " - + " WHERE lower(MyP.qualifier.classid) = '" + ORCID + "' or " - +" lower(MyP.qalifier.classid) = '" + PROPAGATION_AUTHOR_PID_CLASSID + "') tmp " + + " WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or " + +" lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp " + " GROUP BY id) r_t " + " JOIN (" + " SELECT source, target " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 3e5c4d641..55f18007d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.List; import java.util.Optional; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -176,7 +177,7 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID_CLASSID, PROPAGATION_AUTHOR_PID_CLASSNAME)); + p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME)); p .setDataInfo( getDataInfo( @@ -201,8 +202,8 @@ public class SparkOrcidToResultFromSemRelJob { return false; } for (StructuredProperty pid : pids.get()) { - if (PROPAGATION_AUTHOR_PID_CLASSID.equals(pid.getQualifier().getClassid().toLowerCase()) || - ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { + if (ModelConstants.ORCID_PENDING.equals(pid.getQualifier().getClassid().toLowerCase()) || + ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { return true; } } From d5efa6963a1fd8af983fe5fa71ffc4cd7d18d8ac Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 2 Dec 2020 11:20:26 +0100 Subject: [PATCH 082/108] using constants in ModelCOnstants --- .../dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index aeaa8a3c1..4989e09d1 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -7,6 +7,7 @@ import java.nio.file.Path; import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver; import eu.dnetlib.dhp.PropagationConstant; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -169,7 +170,7 @@ public class OrcidPropagationJobTest { .filter( "id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' " + "and name = 'Vajinder' and surname = 'Kumar' and pidType = '" + - PropagationConstant.PROPAGATION_AUTHOR_PID_CLASSID +"'") + ModelConstants.ORCID_PENDING + "'") .count()); Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count()); From 74242e450e6d288db3275ce8709e4ae0f2815051 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 2 Dec 2020 11:23:35 +0100 Subject: [PATCH 083/108] using constants from ModelConstants --- .../orcidtoresultfromsemrel/OrcidPropagationJobTest.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index aeaa8a3c1..ba50f9bc5 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -5,8 +5,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver; -import eu.dnetlib.dhp.PropagationConstant; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -21,8 +19,11 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.cloudera.org.codehaus.jackson.map.jsontype.impl.ClassNameIdResolver; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.PropagationConstant; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Dataset; public class OrcidPropagationJobTest { @@ -169,7 +170,7 @@ public class OrcidPropagationJobTest { .filter( "id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' " + "and name = 'Vajinder' and surname = 'Kumar' and pidType = '" + - PropagationConstant.PROPAGATION_AUTHOR_PID_CLASSID +"'") + ModelConstants.ORCID_PENDING + "'") .count()); Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count()); From cfb55effd9f26d9143a0ccaea01906bd2b87c7ab Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 2 Dec 2020 11:23:49 +0100 Subject: [PATCH 084/108] code formatting --- .../orcidnodoi/oaf/PublicationToOaf.java | 12 ++++---- .../eu/dnetlib/dhp/PropagationConstant.java | 3 -- .../PrepareResultOrcidAssociationStep1.java | 4 +-- .../SparkOrcidToResultFromSemRelJob.java | 4 +-- .../dhp/oa/graph/clean/CleaningFunctions.java | 29 ++++++++++--------- .../oa/provision/XmlRecordFactoryTest.java | 24 +++++++-------- 6 files changed, 36 insertions(+), 40 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 18fecc6c2..1aed66dfd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -531,12 +531,12 @@ public class PublicationToOaf implements Serializable { dataInfo.setInferred(false); dataInfo.setTrust("0.9"); dataInfo - .setProvenanceaction( - mapQualifier( - "sysimport:crosswalk:entityregistry", - "Harvested", - "dnet:provenanceActions", - "dnet:provenanceActions")); + .setProvenanceaction( + mapQualifier( + "sysimport:crosswalk:entityregistry", + "Harvested", + "dnet:provenanceActions", + "dnet:provenanceActions")); sp.setDataInfo(dataInfo); return sp; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 360cf5ffa..692605b03 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -44,9 +44,6 @@ public class PropagationConstant { public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID = "authorpid:result"; public static final String PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME = "Propagation of authors pid to result through semantic relations"; - - - public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final String cfHbforResultQuery = "select distinct r.id, inst.collectedfrom.key cf, inst.hostedby.key hb " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 99ae1ee2d..dd8342980 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -23,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; @@ -104,7 +104,7 @@ public class PrepareResultOrcidAssociationStep1 { + " LATERAL VIEW EXPLODE (author) a AS MyT " + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " + " WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or " - +" lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp " + + " lower(MyP.qalifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp " + " GROUP BY id) r_t " + " JOIN (" + " SELECT source, target " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 55f18007d..9f08fe580 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.List; import java.util.Optional; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; @@ -24,6 +23,7 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.PacePerson; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; @@ -203,7 +203,7 @@ public class SparkOrcidToResultFromSemRelJob { } for (StructuredProperty pid : pids.get()) { if (ModelConstants.ORCID_PENDING.equals(pid.getQualifier().getClassid().toLowerCase()) || - ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { + ModelConstants.ORCID.equals(pid.getQualifier().getClassid().toLowerCase())) { return true; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java index 945f717bb..2a6fd3a1d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java @@ -191,11 +191,13 @@ public class CleaningFunctions { } final Set collectedFrom = Optional - .ofNullable(r.getCollectedfrom()) - .map(c -> c.stream() - .map(KeyValue::getKey) - .collect(Collectors.toCollection(HashSet::new))) - .orElse(new HashSet<>()); + .ofNullable(r.getCollectedfrom()) + .map( + c -> c + .stream() + .map(KeyValue::getKey) + .collect(Collectors.toCollection(HashSet::new))) + .orElse(new HashSet<>()); for (Author a : r.getAuthor()) { if (Objects.isNull(a.getPid())) { @@ -211,12 +213,13 @@ public class CleaningFunctions { .map(p -> { // hack to distinguish orcid from orcid_pending String pidProvenance = Optional - .ofNullable(p.getDataInfo()) - .map(d -> Optional - .ofNullable(d.getProvenanceaction()) - .map(Qualifier::getClassid) - .orElse("")) - .orElse(""); + .ofNullable(p.getDataInfo()) + .map( + d -> Optional + .ofNullable(d.getProvenanceaction()) + .map(Qualifier::getClassid) + .orElse("")) + .orElse(""); if (pidProvenance.equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) { p.getQualifier().setClassid(ModelConstants.ORCID); } else { @@ -229,8 +232,8 @@ public class CleaningFunctions { Collectors .toMap( p -> p.getQualifier().getClassid() + p.getValue(), - Function.identity(), - (p1, p2) -> p1, + Function.identity(), + (p1, p2) -> p1, LinkedHashMap::new)) .values() .stream() diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 619df7716..e84f97836 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -7,11 +7,6 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -20,13 +15,18 @@ import org.dom4j.io.SAXReader; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.mockito.Mock; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; -import org.mockito.Mock; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; //TODO to enable it we need to update the joined_entity.json test file //@Disabled @@ -44,34 +44,30 @@ public class XmlRecordFactoryTest { assertNotNull(je); Document doc = buildXml(je); - ////TODO specific test assertion on doc + //// TODO specific test assertion on doc } - - @Test void testBologna() throws IOException, DocumentException { final String json = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.json")); - Publication oaf = new ObjectMapper().readValue(json, Publication.class); + Publication oaf = new ObjectMapper().readValue(json, Publication.class); assertNotNull(oaf); JoinedEntity je = new JoinedEntity(); je.setEntity(oaf); assertNotNull(je); Document doc = buildXml(je); - //TODO specific test assertion on doc + // TODO specific test assertion on doc System.out.println(doc.asXML()); - - } private Document buildXml(JoinedEntity je) throws DocumentException { ContextMapper contextMapper = new ContextMapper(); XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + otherDsTypeId); String xml = xmlRecordFactory.build(je); From 6ba8037cc7926f0fd8b00e31ac6ce39ef18b1f7f Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 2 Dec 2020 11:34:46 +0100 Subject: [PATCH 085/108] fixed failure to test due to changing of input --- .../java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index a3bb2a4f4..4568e23a5 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -158,7 +158,7 @@ class CrossrefMappingTest { rels.foreach(s => logger.info(s.getTarget)) - assertEquals(rels.size, 3 ) + assertEquals(rels.size, 6 ) } From 7da679542fdc365a1296546b5afd69151b1347a5 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 2 Dec 2020 14:28:09 +0100 Subject: [PATCH 086/108] fixed wrong projectId --- .../main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index d1f6d8613..1adb7465e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -294,7 +294,7 @@ case object Crossref2Oaf { } def getProjectId (nsPrefix:String, targetId:String):String = { - "40|$nsPrefix::$targetId" + s"40|$nsPrefix::$targetId" } From 0948536614d1c1632a93ca514fe5b2e0215ec622 Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 2 Dec 2020 15:41:56 +0200 Subject: [PATCH 087/108] initial implementation of the promote wf --- dhp-workflows/dhp-stats-promote/pom.xml | 32 ++ .../graph/stats/oozie_app/config-default.xml | 34 ++ .../oa/graph/stats/oozie_app/impala-shell.sh | 18 ++ .../oa/graph/stats/oozie_app/promoteCache.sh | 4 + .../graph/stats/oozie_app/scripts/step1.sql | 8 + .../graph/stats/oozie_app/scripts/step10.sql | 21 ++ .../graph/stats/oozie_app/scripts/step11.sql | 44 +++ .../graph/stats/oozie_app/scripts/step12.sql | 38 +++ .../graph/stats/oozie_app/scripts/step13.sql | 59 ++++ .../graph/stats/oozie_app/scripts/step14.sql | 49 +++ .../graph/stats/oozie_app/scripts/step15.sql | 36 +++ .../graph/stats/oozie_app/scripts/step16.sql | 80 +++++ .../stats/oozie_app/scripts/step16_5.sql | 55 ++++ .../stats/oozie_app/scripts/step16_6.sql | 32 ++ .../graph/stats/oozie_app/scripts/step17.sql | 207 ++++++++++++ .../graph/stats/oozie_app/scripts/step18.sql | 8 + .../graph/stats/oozie_app/scripts/step19.sql | 8 + .../graph/stats/oozie_app/scripts/step2.sql | 44 +++ .../graph/stats/oozie_app/scripts/step3.sql | 36 +++ .../graph/stats/oozie_app/scripts/step4.sql | 36 +++ .../graph/stats/oozie_app/scripts/step5.sql | 36 +++ .../graph/stats/oozie_app/scripts/step6.sql | 30 ++ .../graph/stats/oozie_app/scripts/step7.sql | 31 ++ .../graph/stats/oozie_app/scripts/step8.sql | 58 ++++ .../graph/stats/oozie_app/scripts/step9.sql | 12 + .../dhp/oa/graph/stats/oozie_app/workflow.xml | 302 ++++++++++++++++++ 26 files changed, 1318 insertions(+) create mode 100644 dhp-workflows/dhp-stats-promote/pom.xml create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/impala-shell.sh create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/promoteCache.sh create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step1.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-stats-promote/pom.xml b/dhp-workflows/dhp-stats-promote/pom.xml new file mode 100644 index 000000000..c64c2f58e --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/pom.xml @@ -0,0 +1,32 @@ + + + + dhp-workflows + eu.dnetlib.dhp + 1.2.4-SNAPSHOT + + 4.0.0 + dhp-stats-promote + + + org.apache.spark + spark-core_2.11 + + + org.apache.spark + spark-sql_2.11 + + + + + + pl.project13.maven + git-commit-id-plugin + 2.1.11 + + false + + + + + diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml new file mode 100644 index 000000000..9331d4ac5 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml @@ -0,0 +1,34 @@ + + + jobTracker + ${jobTracker} + + + nameNode + ${nameNode} + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hive_jdbc_url + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + oozie.wf.workflow.notification.url + {serviceUrl}/v1/oozieNotification/jobUpdate?jobId=$jobId%26status=$status + + + stats_tool_api_url + ${stats_tool_api_url} + + \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/impala-shell.sh b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/impala-shell.sh new file mode 100644 index 000000000..70112dc7b --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/impala-shell.sh @@ -0,0 +1,18 @@ +export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs +export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) +if ! [ -L $link_folder ] +then + rm -Rf "$link_folder" + ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} +fi + +echo "Getting file from " $3 +hdfs dfs -copyToLocal $3 + +echo "Running impala shell make the new database visible" +impala-shell -q "INVALIDATE METADATA;" + +echo "Running impala shell to compute new table stats" +impala-shell -d $1 -f $2 +echo "Impala shell finished" +rm $2 diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/promoteCache.sh b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/promoteCache.sh new file mode 100644 index 000000000..2d28377fb --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/promoteCache.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +curl --request GET $1/cache/promoteCache + diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step1.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step1.sql new file mode 100644 index 000000000..9697a1dc8 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step1.sql @@ -0,0 +1,8 @@ +-------------------------------------------------------------- +-------------------------------------------------------------- +-- Stats database creation +-------------------------------------------------------------- +-------------------------------------------------------------- + +DROP database IF EXISTS ${stats_db_name} CASCADE; +CREATE database ${stats_db_name}; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql new file mode 100644 index 000000000..46ff295f4 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql @@ -0,0 +1,21 @@ +------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------ +-- Tables/views from external tables/views (Fundref, Country, CountyGDP, roarmap, rndexpediture) +------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------ +CREATE OR REPLACE VIEW ${stats_db_name}.fundref AS SELECT * FROM ${external_stats_db_name}.fundref; +CREATE OR REPLACE VIEW ${stats_db_name}.country AS SELECT * FROM ${external_stats_db_name}.country; +CREATE OR REPLACE VIEW ${stats_db_name}.countrygdp AS SELECT * FROM ${external_stats_db_name}.countrygdp; +CREATE OR REPLACE VIEW ${stats_db_name}.roarmap AS SELECT * FROM ${external_stats_db_name}.roarmap; +CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS SELECT * FROM ${external_stats_db_name}.rndexpediture; +CREATE OR REPLACE VIEW ${stats_db_name}.context AS SELECT * FROM ${external_stats_db_name}.context; +CREATE OR REPLACE VIEW ${stats_db_name}.category AS SELECT * FROM ${external_stats_db_name}.category; +CREATE OR REPLACE VIEW ${stats_db_name}.concept AS SELECT * FROM ${external_stats_db_name}.concept; + + +------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------ +-- Creation date of the database +------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------ +create table ${stats_db_name}.creation_date as select date_format(current_date(), 'dd-MM-yyyy') as date; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql new file mode 100644 index 000000000..13e141459 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql @@ -0,0 +1,44 @@ +---------------------------------------------------------------- +---------------------------------------------------------------- +-- Post processing - Updates on main tables +---------------------------------------------------------------- +---------------------------------------------------------------- + +--Datasource temporary table updates +UPDATE ${stats_db_name}.datasource_tmp SET harvested='true' WHERE datasource_tmp.id IN (SELECT DISTINCT d.id FROM ${stats_db_name}.datasource_tmp d, ${stats_db_name}.result_datasources rd WHERE d.id=rd.datasource); + +-- Project temporary table update and final project table creation with final updates that can not be applied to ORC tables +UPDATE ${stats_db_name}.project_tmp SET haspubs='yes' WHERE project_tmp.id IN (SELECT pr.id FROM ${stats_db_name}.project_results pr, ${stats_db_name}.result r WHERE pr.result=r.id AND r.type='publication'); + +DROP TABLE IF EXISTS ${stats_db_name}.project; +CREATE TABLE ${stats_db_name}.project stored as parquet as +SELECT p.id , p.acronym, p.title, p.funder, p.funding_lvl0, p.funding_lvl1, p.funding_lvl2, p.ec39, p.type, p.startdate, p.enddate, p.start_year, p.end_year, p.duration, +CASE WHEN prr1.id IS NULL THEN 'no' ELSE 'yes' END AS haspubs, +CASE WHEN prr1.id IS NULL THEN 0 ELSE prr1.np END AS numpubs, +CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.daysForlastPub END AS daysforlastpub, +CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs, +p.callidentifier, p.code +FROM ${stats_db_name}.project_tmp p +LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np + FROM ${stats_db_name}.project_results pr INNER JOIN ${stats_db_name}.result r ON pr.result=r.id + WHERE r.type='publication' + GROUP BY pr.id) AS prr1 on prr1.id = p.id +LEFT JOIN (SELECT pp.id, max(datediff(to_date(r.date), to_date(pp.enddate)) ) AS daysForlastPub , count(distinct r.id) AS dp + FROM ${stats_db_name}.project_tmp pp, ${stats_db_name}.project_results pr, ${stats_db_name}.result r + WHERE pp.id=pr.id AND pr.result=r.id AND r.type='publication' AND datediff(to_date(r.date), to_date(pp.enddate)) > 0 + GROUP BY pp.id) AS prr2 + ON prr2.id = p.id; + +-- Publication temporary table updates +UPDATE ${stats_db_name}.publication_tmp SET delayed = 'yes' WHERE publication_tmp.id IN (SELECT distinct r.id FROM stats_wf_db_obs.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date)-to_date(p.enddate) > 0); + +-- Dataset temporary table updates +UPDATE ${stats_db_name}.dataset_tmp SET delayed = 'yes' WHERE dataset_tmp.id IN (SELECT distinct r.id FROM stats_wf_db_obs.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date)-to_date(p.enddate) > 0); + +-- Software temporary table updates +UPDATE ${stats_db_name}.software_tmp SET delayed = 'yes' WHERE software_tmp.id IN (SELECT distinct r.id FROM ${stats_db_name}.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date)-to_date(p.enddate) > 0); + +-- Oherresearchproduct temporary table updates +UPDATE ${stats_db_name}.otherresearchproduct_tmp SET delayed = 'yes' WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id FROM ${stats_db_name}.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date)-to_date(p.enddate) > 0); + +CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS SELECT result_projects.id AS result, result_projects.project AS project_results, result.date as resultdate, project.enddate as projectenddate, result_projects.daysfromend AS daysfromend FROM ${stats_db_name}.result_projects, ${stats_db_name}.result, ${stats_db_name}.project WHERE result_projects.id=result.id AND result.type='publication' AND project.id=result_projects.project; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql new file mode 100644 index 000000000..25439852e --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql @@ -0,0 +1,38 @@ +------------------------------------------------------------------------------------------------------ +-- Creating parquet tables from the updated temporary tables and removing unnecessary temporary tables +------------------------------------------------------------------------------------------------------ + +DROP TABLE IF EXISTS ${stats_db_name}.datasource; +CREATE TABLE ${stats_db_name}.datasource stored AS parquet AS SELECT * FROM ${stats_db_name}.datasource_tmp; + +DROP TABLE IF EXISTS ${stats_db_name}.publication; +CREATE TABLE ${stats_db_name}.publication stored AS parquet AS SELECT * FROM ${stats_db_name}.publication_tmp; + +DROP TABLE IF EXISTS ${stats_db_name}.dataset; +CREATE TABLE ${stats_db_name}.dataset stored AS parquet AS SELECT * FROM ${stats_db_name}.dataset_tmp; + +DROP TABLE IF EXISTS ${stats_db_name}.software; +CREATE TABLE ${stats_db_name}.software stored AS parquet AS SELECT * FROM ${stats_db_name}.software_tmp; + +DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct; +CREATE TABLE ${stats_db_name}.otherresearchproduct stored AS parquet AS SELECT * FROM ${stats_db_name}.otherresearchproduct_tmp; + +DROP TABLE ${stats_db_name}.project_tmp; +DROP TABLE ${stats_db_name}.datasource_tmp; +DROP TABLE ${stats_db_name}.publication_tmp; +DROP TABLE ${stats_db_name}.dataset_tmp; +DROP TABLE ${stats_db_name}.software_tmp; +DROP TABLE ${stats_db_name}.otherresearchproduct_tmp; + +---------------------------------------------- +-- Re-creating views from final parquet tables +--------------------------------------------- + +-- Result +CREATE OR REPLACE VIEW ${stats_db_name}.result AS SELECT *, bestlicence AS access_mode FROM ${stats_db_name}.publication UNION ALL SELECT *, bestlicence as access_mode FROM ${stats_db_name}.software UNION ALL SELECT *, bestlicence AS access_mode FROM ${stats_db_name}.dataset UNION ALL SELECT *, bestlicence AS access_mode FROM ${stats_db_name}.otherresearchproduct; + + +------------------------------------------------------------------------------- +-- To see with Antonis if the following is needed and where it should be placed +------------------------------------------------------------------------------- +CREATE TABLE ${stats_db_name}.numbers_country AS SELECT org.country AS country, count(distinct rd.datasource) AS datasources, count(distinct r.id) AS publications FROM ${stats_db_name}.result r, ${stats_db_name}.result_datasources rd, ${stats_db_name}.datasource d, ${stats_db_name}.datasource_organizations dor, ${stats_db_name}.organization org WHERE r.id=rd.id AND rd.datasource=d.id AND d.id=dor.id AND dor.organization=org.id AND r.type='publication' AND r.bestlicence='Open Access' GROUP BY org.country; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql new file mode 100644 index 000000000..795770313 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -0,0 +1,59 @@ +------------------------------------------------------ +------------------------------------------------------ +-- Additional relations +-- +-- Sources related tables/views +------------------------------------------------------ +------------------------------------------------------ +CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources as +SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource +FROM ( + SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource +from ${openaire_db_name}.publication p lateral view explode(p.collectedfrom.key) c as datasource) p +LEFT OUTER JOIN +( + SELECT substr(d.id, 4) id + from ${openaire_db_name}.datasource d + WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as +SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource +FROM ( + SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource +from ${openaire_db_name}.dataset p lateral view explode(p.collectedfrom.key) c as datasource) p +LEFT OUTER JOIN +( + SELECT substr(d.id, 4) id + from ${openaire_db_name}.datasource d + WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as +SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource +FROM ( + SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource +from ${openaire_db_name}.software p lateral view explode(p.collectedfrom.key) c as datasource) p +LEFT OUTER JOIN +( + SELECT substr(d.id, 4) id + from ${openaire_db_name}.datasource d + WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as +SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource +FROM ( + SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource +from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.collectedfrom.key) c as datasource) p +LEFT OUTER JOIN +( + SELECT substr(d.id, 4) id + from ${openaire_db_name}.datasource d + WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + +CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS +SELECT * FROM ${stats_db_name}.publication_sources +UNION ALL +SELECT * FROM ${stats_db_name}.dataset_sources +UNION ALL +SELECT * FROM ${stats_db_name}.software_sources +UNION ALL +SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql new file mode 100644 index 000000000..4a56b5d68 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql @@ -0,0 +1,49 @@ +------------------------------------------------------ +------------------------------------------------------ +-- Additional relations +-- +-- Licences related tables/views +------------------------------------------------------ +------------------------------------------------------ +CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS +SELECT substr(p.id, 4) as id, licenses.value as type +from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS +SELECT substr(p.id, 4) as id, licenses.value as type +from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS +SELECT substr(p.id, 4) as id, licenses.value as type +from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS +SELECT substr(p.id, 4) as id, licenses.value as type +from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses +where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; + +CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS +SELECT * FROM ${stats_db_name}.publication_licenses +UNION ALL +SELECT * FROM ${stats_db_name}.dataset_licenses +UNION ALL +SELECT * FROM ${stats_db_name}.software_licenses +UNION ALL +SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids AS +select substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid +from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources as +SELECT o.id, case when d.id is null then 'other' else o.datasource end as datasource +FROM ( + SELECT substr(o.id, 4) as id, substr(instances.instance.key, 4) as datasource + from ${openaire_db_name}.organization o lateral view explode(o.collectedfrom) instances as instance) o + LEFT OUTER JOIN ( + SELECT substr(d.id, 4) id + from ${openaire_db_name}.datasource d + WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql new file mode 100644 index 000000000..60b37048b --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -0,0 +1,36 @@ +------------------------------------------------------ +------------------------------------------------------ +-- Additional relations +-- +-- Refereed related tables/views +------------------------------------------------------ +------------------------------------------------------ + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as +select substr(r.id, 4) as id, inst.refereed.classname as refereed +from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst +where r.datainfo.deletedbyinference=false; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as +select substr(r.id, 4) as id, inst.refereed.classname as refereed +from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst +where r.datainfo.deletedbyinference=false; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as +select substr(r.id, 4) as id, inst.refereed.classname as refereed +from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst +where r.datainfo.deletedbyinference=false; + +CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as +select substr(r.id, 4) as id, inst.refereed.classname as refereed +from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst +where r.datainfo.deletedbyinference=false; + +CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as +select * from ${stats_db_name}.publication_refereed +union all +select * from ${stats_db_name}.dataset_refereed +union all +select * from ${stats_db_name}.software_refereed +union all +select * from ${stats_db_name}.otherresearchproduct_refereed; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql new file mode 100644 index 000000000..33849b960 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql @@ -0,0 +1,80 @@ +---------------------------------------------------- +-- Shortcuts for various definitions in stats db --- +---------------------------------------------------- + +-- Peer reviewed: +-- Results that have been collected from Crossref +create table ${stats_db_name}.result_peerreviewed as +with peer_reviewed as ( + select distinct r.id as id + from ${stats_db_name}.result r + join ${stats_db_name}.result_sources rs on rs.id=r.id + join ${stats_db_name}.datasource d on d.id=rs.datasource + where d.name='Crossref') +select distinct peer_reviewed.id as id, true as peer_reviewed +from peer_reviewed +union all +select distinct r.id as id, false as peer_reviewed +from ${stats_db_name}.result r +left outer join peer_reviewed pr on pr.id=r.id +where pr.id is null; + +-- Green OA: +-- OA results that are hosted by an Institutional repository and have NOT been harvested from a DOAJ journal. +create table ${stats_db_name}.result_greenoa as +with result_green as ( + select distinct r.id as id + from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource + left outer join ( + select rd.id from ${stats_db_name}.result_datasources rd + join ${stats_db_name}.datasource d on rd.datasource=d.id + join ${stats_db_name}.datasource_sources sds on sds.id=d.id + join ${stats_db_name}.datasource sd on sd.id=sds.datasource + where sd.name='DOAJ-ARTICLES' + ) as doaj on doaj.id=r.id + where r.bestlicence in ('Open Access', 'Open Source') and d.type='Institutional Repository' and doaj.id is null) +select distinct result_green.id, true as green +from result_green +union all +select distinct r.id as id, false as green +from ${stats_db_name}.result r +left outer join result_green rg on rg.id=r.id +where rg.id is null; + +-- GOLD OA: +-- OA results that have been harvested from a DOAJ journal. +create table ${stats_db_name}.result_gold as +with result_gold as ( + select distinct r.id as id + from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource + join ${stats_db_name}.datasource_sources sds on sds.id=d.id + join ${stats_db_name}.datasource sd on sd.id=sds.datasource + where r.type='publication' and r.bestlicence='Open Access' and sd.name='DOAJ-Articles') +select distinct result_gold.id, true as gold +from result_gold +union all +select distinct r.id, false as gold +from ${stats_db_name}.result r +where r.id not in (select id from result_gold); + +-- shortcut result-country through the organization affiliation +create table ${stats_db_name}.result_affiliated_country as +select r.id as id, o.country as country +from ${stats_db_name}.result r +join ${stats_db_name}.result_organization ro on ro.id=r.id +join ${stats_db_name}.organization o on o.id=ro.organization +where o.country is not null and o.country!=''; + +-- shortcut result-country through datasource of deposition +create table ${stats_db_name}.result_deposited_country as +select r.id as id, o.country as country +from ${stats_db_name}.result r +join ${stats_db_name}.result_datasources rd on rd.id=r.id +join ${stats_db_name}.datasource d on d.id=rd.datasource +join ${stats_db_name}.datasource_organizations dor on dor.id=d.id +join ${stats_db_name}.organization o on o.id=dor.organization +where o.country is not null and o.country!=''; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql new file mode 100644 index 000000000..f737c1ea6 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql @@ -0,0 +1,55 @@ +-- replace the creation of the result view to include the boolean fields from the previous tables (green, gold, +-- peer reviewed) +drop table if exists ${stats_db_name}.result_tmp; +CREATE TABLE ${stats_db_name}.result_tmp ( + id STRING, + title STRING, + publisher STRING, + journal STRING, + `date` STRING, + `year` INT, + bestlicence STRING, + access_mode STRING, + embargo_end_date STRING, + delayed BOOLEAN, + authors INT, + source STRING, + abstract BOOLEAN, + type STRING , + peer_reviewed BOOLEAN, + green BOOLEAN, + gold BOOLEAN) +clustered by (id) into 100 buckets stored as orc tblproperties('transactional'='true'); + +insert into ${stats_db_name}.result_tmp +select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold +FROM ${stats_db_name}.publication r +LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id +LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id +LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; + +insert into ${stats_db_name}.result_tmp +select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold +FROM ${stats_db_name}.dataset r +LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id +LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id +LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; + +insert into ${stats_db_name}.result_tmp +select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold +FROM ${stats_db_name}.software r +LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id +LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id +LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; + +insert into ${stats_db_name}.result_tmp +select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold +FROM ${stats_db_name}.otherresearchproduct r +LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id +LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id +LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; + +drop table if exists ${stats_db_name}.result; +drop view if exists ${stats_db_name}.result; +create table ${stats_db_name}.result stored as parquet as select * from ${stats_db_name}.result_tmp; +drop table ${stats_db_name}.result_tmp; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql new file mode 100644 index 000000000..ced7bbc11 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql @@ -0,0 +1,32 @@ +------------------------------------------- +--- Extra tables, mostly used by indicators + +create table ${stats_db_name}.result_projectcount as +select r.id, count(distinct p.id) as count +from ${stats_db_name}.result r +left outer join ${stats_db_name}.result_projects rp on rp.id=r.id +left outer join ${stats_db_name}.project p on p.id=rp.project +group by r.id; + +create table ${stats_db_name}.result_fundercount as +select r.id, count(distinct p.funder) as count +from ${stats_db_name}.result r +left outer join ${stats_db_name}.result_projects rp on rp.id=r.id +left outer join ${stats_db_name}.project p on p.id=rp.project +group by r.id; + +create table ${stats_db_name}.project_resultcount as +with rcount as ( + select p.id as pid, count(distinct r.id) as `count`, r.type as type + from ${stats_db_name}.project p + left outer join ${stats_db_name}.result_projects rp on rp.project=p.id + left outer join ${stats_db_name}.result r on r.id=rp.id + group by r.type, p.id ) +select rcount.pid, sum(case when rcount.type='publication' then rcount.count else 0 end) as publications, + sum(case when rcount.type='dataset' then rcount.count else 0 end) as datasets, + sum(case when rcount.type='software' then rcount.count else 0 end) as software, + sum(case when rcount.type='other' then rcount.count else 0 end) as other +from rcount +group by rcount.pid; + +create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql new file mode 100644 index 000000000..5c102d014 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql @@ -0,0 +1,207 @@ +------------------------------------------------------ +------------------------------------------------------ +-- Shadow schema table exchange +------------------------------------------------------ +------------------------------------------------------ + +-- Dropping old views +DROP VIEW IF EXISTS ${stats_db_shadow_name}.category; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.concept; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.context; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.country; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.countrygdp; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.creation_date; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_citations; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_classifications; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_concepts; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_datasources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_languages; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_licenses; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_oids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_pids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_refereed; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_sources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_topics; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_languages; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_oids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_organizations; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_results; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_sources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.funder; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.fundref; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.numbers_country; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_datasources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_pids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_projects; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_sources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_citations; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_classifications; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_concepts; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_datasources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_languages; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_licenses; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_oids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_pids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_refereed; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_sources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_topics; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.project; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_oids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_organizations; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_results; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_resultcount; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_results_publication; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_citations; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_classifications; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_concepts; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_datasources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_languages; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_licenses; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_oids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_pids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_refereed; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_sources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_topics; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_affiliated_country; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_citations; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_classifications; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_concepts; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_datasources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_deposited_country; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_fundercount; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_gold; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_greenoa; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_languages; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_licenses; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_oids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_organization; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_peerreviewed; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_pids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_projectcount; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_projects; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_refereed; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_sources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_topics; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.rndexpediture; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.roarmap; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.software; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_citations; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_classifications; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_concepts; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_datasources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_languages; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_licenses; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_oids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_pids; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_refereed; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_sources; +DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_topics; + + +-- Creating the shadow database, in case it doesn't exist +CREATE database IF NOT EXISTS ${stats_db_shadow_name}; + +-- Creating new views +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.category AS SELECT * FROM ${stats_db_name}.category; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.concept AS SELECT * FROM ${stats_db_name}.concept; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.context AS SELECT * FROM ${stats_db_name}.context; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.country AS SELECT * FROM ${stats_db_name}.country; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.countrygdp AS SELECT * FROM ${stats_db_name}.countrygdp; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.creation_date AS SELECT * FROM ${stats_db_name}.creation_date; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset AS SELECT * FROM ${stats_db_name}.dataset; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_citations AS SELECT * FROM ${stats_db_name}.dataset_citations; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_classifications AS SELECT * FROM ${stats_db_name}.dataset_classifications; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_concepts AS SELECT * FROM ${stats_db_name}.dataset_concepts; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_datasources AS SELECT * FROM ${stats_db_name}.dataset_datasources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_languages AS SELECT * FROM ${stats_db_name}.dataset_languages; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_licenses AS SELECT * FROM ${stats_db_name}.dataset_licenses; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_oids AS SELECT * FROM ${stats_db_name}.dataset_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_pids AS SELECT * FROM ${stats_db_name}.dataset_pids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_refereed AS SELECT * FROM ${stats_db_name}.dataset_refereed; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_sources AS SELECT * FROM ${stats_db_name}.dataset_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_topics AS SELECT * FROM ${stats_db_name}.dataset_topics; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource AS SELECT * FROM ${stats_db_name}.datasource; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_languages AS SELECT * FROM ${stats_db_name}.datasource_languages; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_oids AS SELECT * FROM ${stats_db_name}.datasource_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_organizations AS SELECT * FROM ${stats_db_name}.datasource_organizations; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_results AS SELECT * FROM ${stats_db_name}.datasource_results; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_sources AS SELECT * FROM ${stats_db_name}.datasource_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.funder AS SELECT * FROM ${stats_db_name}.funder; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.fundref AS SELECT * FROM ${stats_db_name}.fundref; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.numbers_country AS SELECT * FROM ${stats_db_name}.numbers_country; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization AS SELECT * FROM ${stats_db_name}.organization; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_datasources AS SELECT * FROM ${stats_db_name}.organization_datasources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_pids AS SELECT * FROM ${stats_db_name}.organization_pids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_projects AS SELECT * FROM ${stats_db_name}.organization_projects; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_sources AS SELECT * FROM ${stats_db_name}.organization_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct AS SELECT * FROM ${stats_db_name}.otherresearchproduct; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_citations AS SELECT * FROM ${stats_db_name}.otherresearchproduct_citations; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_classifications AS SELECT * FROM ${stats_db_name}.otherresearchproduct_classifications; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_concepts AS SELECT * FROM ${stats_db_name}.otherresearchproduct_concepts; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_datasources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_datasources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_languages AS SELECT * FROM ${stats_db_name}.otherresearchproduct_languages; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_licenses AS SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_oids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_pids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_pids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_refereed AS SELECT * FROM ${stats_db_name}.otherresearchproduct_refereed; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_sources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_topics AS SELECT * FROM ${stats_db_name}.otherresearchproduct_topics; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project AS SELECT * FROM ${stats_db_name}.project; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_oids AS SELECT * FROM ${stats_db_name}.project_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_organizations AS SELECT * FROM ${stats_db_name}.project_organizations; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_results AS SELECT * FROM ${stats_db_name}.project_results; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_resultcount AS SELECT * FROM ${stats_db_name}.project_resultcount; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_results_publication AS SELECT * FROM ${stats_db_name}.project_results_publication; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication AS SELECT * FROM ${stats_db_name}.publication; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_citations AS SELECT * FROM ${stats_db_name}.publication_citations; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_classifications AS SELECT * FROM ${stats_db_name}.publication_classifications; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_concepts AS SELECT * FROM ${stats_db_name}.publication_concepts; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_datasources AS SELECT * FROM ${stats_db_name}.publication_datasources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_languages AS SELECT * FROM ${stats_db_name}.publication_languages; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_licenses AS SELECT * FROM ${stats_db_name}.publication_licenses; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_oids AS SELECT * FROM ${stats_db_name}.publication_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_pids AS SELECT * FROM ${stats_db_name}.publication_pids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_refereed AS SELECT * FROM ${stats_db_name}.publication_refereed; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_sources AS SELECT * FROM ${stats_db_name}.publication_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_topics AS SELECT * FROM ${stats_db_name}.publication_topics; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result AS SELECT * FROM ${stats_db_name}.result; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_affiliated_country AS SELECT * FROM ${stats_db_name}.result_affiliated_country; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_citations AS SELECT * FROM ${stats_db_name}.result_citations; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_classifications AS SELECT * FROM ${stats_db_name}.result_classifications; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_concepts AS SELECT * FROM ${stats_db_name}.result_concepts; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_datasources AS SELECT * FROM ${stats_db_name}.result_datasources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_deposited_country AS SELECT * FROM ${stats_db_name}.result_deposited_country; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_fundercount AS SELECT * FROM ${stats_db_name}.result_fundercount; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_gold AS SELECT * FROM ${stats_db_name}.result_gold; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_greenoa AS SELECT * FROM ${stats_db_name}.result_greenoa; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_languages AS SELECT * FROM ${stats_db_name}.result_languages; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_licenses AS SELECT * FROM ${stats_db_name}.result_licenses; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_oids AS SELECT * FROM ${stats_db_name}.result_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_organization AS SELECT * FROM ${stats_db_name}.result_organization; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_peerreviewed AS SELECT * FROM ${stats_db_name}.result_peerreviewed; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_pids AS SELECT * FROM ${stats_db_name}.result_pids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_projectcount AS SELECT * FROM ${stats_db_name}.result_projectcount; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_projects AS SELECT * FROM ${stats_db_name}.result_projects; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_refereed AS SELECT * FROM ${stats_db_name}.result_refereed; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_sources AS SELECT * FROM ${stats_db_name}.result_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_topics AS SELECT * FROM ${stats_db_name}.result_topics; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.rndexpediture AS SELECT * FROM ${stats_db_name}.rndexpediture; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.roarmap AS SELECT * FROM ${stats_db_name}.roarmap; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software AS SELECT * FROM ${stats_db_name}.software; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_citations AS SELECT * FROM ${stats_db_name}.software_citations; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_classifications AS SELECT * FROM ${stats_db_name}.software_classifications; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_concepts AS SELECT * FROM ${stats_db_name}.software_concepts; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_datasources AS SELECT * FROM ${stats_db_name}.software_datasources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_languages AS SELECT * FROM ${stats_db_name}.software_languages; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_licenses AS SELECT * FROM ${stats_db_name}.software_licenses; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_oids AS SELECT * FROM ${stats_db_name}.software_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_pids AS SELECT * FROM ${stats_db_name}.software_pids; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_refereed AS SELECT * FROM ${stats_db_name}.software_refereed; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_sources AS SELECT * FROM ${stats_db_name}.software_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_topics AS SELECT * FROM ${stats_db_name}.software_topics; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql new file mode 100644 index 000000000..34e48a18a --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql @@ -0,0 +1,8 @@ +------------------------------------------------------ +------------------------------------------------------ +-- Impala table statistics - Needed to make the tables +-- visible for impala +------------------------------------------------------ +------------------------------------------------------ + +INVALIDATE METADATA ${stats_db_name}; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql new file mode 100644 index 000000000..34e48a18a --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql @@ -0,0 +1,8 @@ +------------------------------------------------------ +------------------------------------------------------ +-- Impala table statistics - Needed to make the tables +-- visible for impala +------------------------------------------------------ +------------------------------------------------------ + +INVALIDATE METADATA ${stats_db_name}; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql new file mode 100644 index 000000000..ba0db25be --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql @@ -0,0 +1,44 @@ +-------------------------------------------------------------- +-------------------------------------------------------------- +-- Publication table/view and Publication related tables/views +-------------------------------------------------------------- +-------------------------------------------------------------- + +-- Publication temporary table +DROP TABLE IF EXISTS ${stats_db_name}.publication_tmp; + +CREATE TABLE ${stats_db_name}.publication_tmp (id STRING, title STRING, publisher STRING, journal STRING, date STRING, year STRING, bestlicence STRING, embargo_end_date STRING, delayed BOOLEAN, authors INT, source STRING, abstract BOOLEAN, type STRING ) clustered by (id) into 100 buckets stored as orc tblproperties('transactional'='true'); + +INSERT INTO ${stats_db_name}.publication_tmp SELECT substr(p.id, 4) as id, p.title[0].value as title, p.publisher.value as publisher, p.journal.name as journal , +p.dateofacceptance.value as date, date_format(p.dateofacceptance.value,'yyyy') as year, p.bestaccessright.classname as bestlicence, +p.embargoenddate.value as embargo_end_date, false as delayed, size(p.author) as authors , concat_ws('\u003B',p.source.value) as source, +case when size(p.description) > 0 then true else false end as abstract, +'publication' as type +from ${openaire_db_name}.publication p +where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.publication_classifications AS SELECT substr(p.id, 4) as id, instancetype.classname as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.instancetype) instances as instancetype where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.publication_concepts AS SELECT substr(p.id, 4) as id, contexts.context.id as concept from ${openaire_db_name}.publication p LATERAL VIEW explode(p.context) contexts as context where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.publication_datasources as +SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource + FROM ( + SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource + from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance + where p.datainfo.deletedbyinference=false ) p + LEFT OUTER JOIN ( + SELECT substr(d.id, 4) id + from ${openaire_db_name}.datasource d + WHERE d.datainfo.deletedbyinference=false ) d on p.datasource = d.id; + +CREATE TABLE ${stats_db_name}.publication_languages AS select substr(p.id, 4) as id, p.language.classname as language FROM ${openaire_db_name}.publication p where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.publication_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.publication_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.publication_topics as select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false; + +-- Publication_citations +CREATE TABLE ${stats_db_name}.publication_citations AS SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and p.datainfo.deletedbyinference=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql new file mode 100644 index 000000000..f69715a31 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql @@ -0,0 +1,36 @@ +------------------------------------------------------ +------------------------------------------------------ +-- Dataset table/view and Dataset related tables/views +------------------------------------------------------ +------------------------------------------------------ + +-- Dataset temporary table supporting updates +DROP TABLE IF EXISTS ${stats_db_name}.dataset_tmp; +CREATE TABLE ${stats_db_name}.dataset_tmp (id STRING, title STRING, publisher STRING, journal STRING, date STRING, year STRING, bestlicence STRING, embargo_end_date STRING, delayed BOOLEAN, authors INT, source STRING, abstract BOOLEAN, type STRING ) clustered by (id) into 100 buckets stored AS orc tblproperties('transactional'='true'); + +INSERT INTO ${stats_db_name}.dataset_tmp SELECT substr(d.id, 4) AS id, d.title[0].value AS title, d.publisher.value AS publisher, cast(null AS string) AS journal, +d.dateofacceptance.value as date, date_format(d.dateofacceptance.value,'yyyy') AS year, d.bestaccessright.classname AS bestlicence, +d.embargoenddate.value AS embargo_end_date, false AS delayed, size(d.author) AS authors , concat_ws('\u003B',d.source.value) AS source, + CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract, +'dataset' AS type +FROM ${openaire_db_name}.dataset d +WHERE d.datainfo.deletedbyinference=FALSE; + +-- Dataset_citations +CREATE TABLE ${stats_db_name}.dataset_citations AS SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result FROM ${openaire_db_name}.dataset d LATERAL VIEW explode(d.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and d.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.dataset_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.dataset_concepts AS SELECT substr(p.id, 4) as id, contexts.context.id as concept from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.context) contexts as context where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.dataset_datasources AS SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource +FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance) instances AS instance where p.datainfo.deletedbyinference=false) p LEFT OUTER JOIN +(SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d ON p.datasource = d.id; + +CREATE TABLE ${stats_db_name}.dataset_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.dataset p where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.dataset_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.dataset_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.dataset_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql new file mode 100644 index 000000000..2c4a625e1 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql @@ -0,0 +1,36 @@ +-------------------------------------------------------- +-------------------------------------------------------- +-- Software table/view and Software related tables/views +-------------------------------------------------------- +-------------------------------------------------------- + +-- Software temporary table supporting updates +DROP TABLE IF EXISTS ${stats_db_name}.software_tmp; +CREATE TABLE ${stats_db_name}.software_tmp (id STRING, title STRING, publisher STRING, journal STRING, date STRING, year STRING, bestlicence STRING, embargo_end_date STRING, delayed BOOLEAN, authors INT, source STRING, abstract BOOLEAN, type STRING ) clustered by (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true'); + +INSERT INTO ${stats_db_name}.software_tmp SELECT substr(s.id, 4) as id, s.title[0].value AS title, s.publisher.value AS publisher, CAST(NULL AS string) AS journal, +s.dateofacceptance.value AS DATE, date_format(s.dateofacceptance.value,'yyyy') AS YEAR, s.bestaccessright.classname AS bestlicence, +s.embargoenddate.value AS embargo_end_date, FALSE AS delayed, SIZE(s.author) AS authors , concat_ws('\u003B',s.source.value) AS source, + CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract, +'software' as type +from ${openaire_db_name}.software s +where s.datainfo.deletedbyinference=false; + +-- Software_citations +CREATE TABLE ${stats_db_name}.software_citations AS SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT FROM ${openaire_db_name}.software s LATERAL VIEW explode(s.extrainfo) citations as citation where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and s.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.software_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.software_concepts AS SELECT substr(p.id, 4) AS id, contexts.context.id AS concept FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.context) contexts AS context where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.software_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource +FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance) instances AS instance where p.datainfo.deletedbyinference=false) p LEFT OUTER JOIN +(SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d ON p.datasource = d.id; + +CREATE TABLE ${stats_db_name}.software_languages AS select substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.software p where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.software_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.software_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.software_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql new file mode 100644 index 000000000..1fa5df8cb --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql @@ -0,0 +1,36 @@ +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +-- Otherresearchproduct table/view and Otherresearchproduct related tables/views +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- + +-- Otherresearchproduct temporary table supporting updates +DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_tmp; +CREATE TABLE ${stats_db_name}.otherresearchproduct_tmp ( id STRING, title STRING, publisher STRING, journal STRING, date STRING, year STRING, bestlicence STRING, embargo_end_date STRING, delayed BOOLEAN, authors INT, source STRING, abstract BOOLEAN, type STRING ) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true'); + +INSERT INTO ${stats_db_name}.otherresearchproduct_tmp SELECT substr(o.id, 4) AS id, o.title[0].value AS title, o.publisher.value AS publisher, CAST(NULL AS string) AS journal, +o.dateofacceptance.value AS DATE, date_format(o.dateofacceptance.value,'yyyy') AS year, o.bestaccessright.classname AS bestlicence, +o.embargoenddate.value as embargo_end_date, FALSE AS delayed, SIZE(o.author) AS authors , concat_ws('\u003B',o.source.value) AS source, +CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract, +'other' AS type +FROM ${openaire_db_name}.otherresearchproduct o +WHERE o.datainfo.deletedbyinference=FALSE; + +-- Otherresearchproduct_citations +CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and o.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS SELECT substr(p.id, 4) AS id, contexts.context.id AS concept FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource +from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance where p.datainfo.deletedbyinference=false) p LEFT OUTER JOIN +(SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; + +CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.otherresearchproduct p where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false; + +CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql new file mode 100644 index 000000000..21a944164 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -0,0 +1,30 @@ +-- noinspection SqlNoDataSourceInspectionForFile + +------------------------------------------------------ +------------------------------------------------------ +-- Project table/view and Project related tables/views +------------------------------------------------------ +------------------------------------------------------ +-- Project_oids Table +DROP TABLE IF EXISTS ${stats_db_name}.project_oids; +CREATE TABLE ${stats_db_name}.project_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids; + +-- Project_organizations Table +DROP TABLE IF EXISTS ${stats_db_name}.project_organizations; +CREATE TABLE ${stats_db_name}.project_organizations AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype='projectOrganization'; + +-- Project_results Table +DROP TABLE IF EXISTS ${stats_db_name}.project_results; +CREATE TABLE ${stats_db_name}.project_results AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result FROM ${openaire_db_name}.relation r WHERE r.reltype='resultProject' and r.datainfo.deletedbyinference=false; + +-- Project table +---------------- +-- Creating and populating temporary Project table +DROP TABLE IF EXISTS ${stats_db_name}.project_tmp; +CREATE TABLE ${stats_db_name}.project_tmp (id STRING, acronym STRING, title STRING, funder STRING, funding_lvl0 STRING, funding_lvl1 STRING, funding_lvl2 STRING, ec39 STRING, type STRING, startdate STRING, enddate STRING, start_year INT, end_year INT, duration INT, haspubs STRING, numpubs INT, daysforlastpub INT, delayedpubs INT, callidentifier STRING, code STRING) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true'); + +INSERT INTO ${stats_db_name}.project_tmp SELECT substr(p.id, 4) AS id, p.acronym.value AS acronym, p.title.value AS title, xpath_string(p.fundingtree[0].value, '//funder/name') AS funder, xpath_string(p.fundingtree[0].value, '//funding_level_0/name') AS funding_lvl0, xpath_string(p.fundingtree[0].value, '//funding_level_1/name') AS funding_lvl1, xpath_string(p.fundingtree[0].value, '//funding_level_2/name') AS funding_lvl2, p.ecsc39.value AS ec39, p.contracttype.classname AS type, p.startdate.value AS startdate, p.enddate.value AS enddate, year(p.startdate.value) AS start_year, year(p.enddate.value) AS end_year, CAST(MONTHS_BETWEEN(p.enddate.value, p.startdate.value) AS INT) AS duration, 'no' AS haspubs, 0 AS numpubs, 0 AS daysforlastpub, 0 AS delayedpubs, p.callidentifier.value AS callidentifier, p.code.value AS code FROM ${openaire_db_name}.project p WHERE p.datainfo.deletedbyinference=false; + +create table ${stats_db_name}.funder as +select distinct xpath_string(fund, '//funder/id') as id, xpath_string(fund, '//funder/name') as name, xpath_string(fund, '//funder/shortname') as shortname +from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql new file mode 100644 index 000000000..7acabf1dd --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -0,0 +1,31 @@ +---------------------------------------------------- +---------------------------------------------------- +-- Result table/view and Result related tables/views +---------------------------------------------------- +---------------------------------------------------- + +-- Views on temporary tables that should be re-created in the end +CREATE OR REPLACE VIEW ${stats_db_name}.result as SELECT *, bestlicence AS access_mode FROM ${stats_db_name}.publication_tmp UNION ALL SELECT *,bestlicence AS access_mode FROM ${stats_db_name}.software_tmp UNION ALL SELECT *,bestlicence AS access_mode FROM ${stats_db_name}.dataset_tmp UNION ALL SELECT *,bestlicence AS access_mode FROM ${stats_db_name}.otherresearchproduct_tmp; + +-- Views on final tables +CREATE OR REPLACE VIEW ${stats_db_name}.result_datasources AS SELECT * FROM ${stats_db_name}.publication_datasources UNION ALL SELECT * FROM ${stats_db_name}.software_datasources UNION ALL SELECT * FROM ${stats_db_name}.dataset_datasources UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_datasources; + +CREATE OR REPLACE VIEW ${stats_db_name}.result_citations AS SELECT * FROM ${stats_db_name}.publication_citations UNION ALL SELECT * FROM ${stats_db_name}.software_citations UNION ALL SELECT * FROM ${stats_db_name}.dataset_citations UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_citations; + +CREATE OR REPLACE VIEW ${stats_db_name}.result_classifications AS SELECT * FROM ${stats_db_name}.publication_classifications UNION ALL SELECT * FROM ${stats_db_name}.software_classifications UNION ALL SELECT * FROM ${stats_db_name}.dataset_classifications UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_classifications; + +CREATE OR REPLACE VIEW ${stats_db_name}.result_concepts AS SELECT * FROM ${stats_db_name}.publication_concepts UNION ALL SELECT * FROM ${stats_db_name}.software_concepts UNION ALL SELECT * FROM ${stats_db_name}.dataset_concepts UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_concepts; + +CREATE OR REPLACE VIEW ${stats_db_name}.result_languages AS SELECT * FROM ${stats_db_name}.publication_languages UNION ALL SELECT * FROM ${stats_db_name}.software_languages UNION ALL SELECT * FROM ${stats_db_name}.dataset_languages UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_languages; + +CREATE OR REPLACE VIEW ${stats_db_name}.result_oids AS SELECT * FROM ${stats_db_name}.publication_oids UNION ALL SELECT * FROM ${stats_db_name}.software_oids UNION ALL SELECT * FROM ${stats_db_name}.dataset_oids UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_oids; + +CREATE OR REPLACE VIEW ${stats_db_name}.result_pids AS SELECT * FROM ${stats_db_name}.publication_pids UNION ALL SELECT * FROM ${stats_db_name}.software_pids UNION ALL SELECT * FROM ${stats_db_name}.dataset_pids UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_pids; + +CREATE OR REPLACE VIEW ${stats_db_name}.result_topics AS SELECT * FROM ${stats_db_name}.publication_topics UNION ALL SELECT * FROM ${stats_db_name}.software_topics UNION ALL SELECT * FROM ${stats_db_name}.dataset_topics UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_topics; + +DROP TABLE IF EXISTS ${stats_db_name}.result_organization; +CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='resultOrganization'; + +DROP TABLE IF EXISTS ${stats_db_name}.result_projects; +CREATE TABLE ${stats_db_name}.result_projects AS select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend FROM ${stats_db_name}.result r JOIN ${stats_db_name}.project_results pr ON r.id=pr.result JOIN ${stats_db_name}.project_tmp p ON p.id=pr.id; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql new file mode 100644 index 000000000..4e13b3dd8 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -0,0 +1,58 @@ +-- noinspection SqlNoDataSourceInspectionForFile + +------------------------------------------------------------ +------------------------------------------------------------ +-- Datasource table/view and Datasource related tables/views +------------------------------------------------------------ +------------------------------------------------------------ + +-- Datasource table creation & update +------------------------------------- +-- Creating and populating temporary datasource table +DROP TABLE IF EXISTS ${stats_db_name}.datasource_tmp; +CREATE TABLE ${stats_db_name}.datasource_tmp(`id` string, `name` STRING, `type` STRING, `dateofvalidation` STRING, `yearofvalidation` string, `harvested` BOOLEAN, `piwik_id` INT, `latitude` STRING, `longitude`STRING, `websiteurl` STRING, `compatibility` STRING) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true'); + +-- Insert statement that takes into account the piwik_id of the openAIRE graph +INSERT INTO ${stats_db_name}.datasource_tmp +SELECT substr(d1.id, 4) AS id, officialname.value AS name, +datasourcetype.classname AS type, dateofvalidation.value AS dateofvalidation, date_format(d1.dateofvalidation.value,'yyyy') AS yearofvalidation, +FALSE AS harvested, +CASE WHEN d2.piwik_id IS NULL THEN 0 ELSE d2.piwik_id END AS piwik_id, +d1.latitude.value AS latitude, d1.longitude.value AS longitude, +d1.websiteurl.value AS websiteurl, d1.openairecompatibility.classid AS compatibility +FROM ${openaire_db_name}.datasource d1 +LEFT OUTER JOIN +(SELECT id, split(originalidd, '\\:')[1] as piwik_id +FROM ${openaire_db_name}.datasource +LATERAL VIEW EXPLODE(originalid) temp AS originalidd +WHERE originalidd like "piwik:%") AS d2 +ON d1.id = d2.id +WHERE d1.datainfo.deletedbyinference=FALSE; + +-- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table. +-- Creating a temporary dual table that will be removed after the following insert +CREATE TABLE ${stats_db_name}.dual(dummy CHAR(1)); +INSERT INTO ${stats_db_name}.dual VALUES('X'); +INSERT INTO ${stats_db_name}.datasource_tmp (`id`, `name`, `type`, `dateofvalidation`, `yearofvalidation`, `harvested`, `piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`) +SELECT 'other', 'Other', 'Repository', NULL, NULL, false, 0, NULL, NULL, NULL, 'unknown' FROM ${stats_db_name}.dual WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name='Unknown Repository'); +DROP TABLE ${stats_db_name}.dual; + +UPDATE ${stats_db_name}.datasource_tmp SET name='Other' WHERE name='Unknown Repository'; +UPDATE ${stats_db_name}.datasource_tmp SET yearofvalidation=null WHERE yearofvalidation='-1'; + +DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages; +CREATE TABLE ${stats_db_name}.datasource_languages AS SELECT substr(d.id, 4) AS id, langs.languages AS language FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages; + +DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids; +CREATE TABLE ${stats_db_name}.datasource_oids AS SELECT substr(d.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids; + +DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations; +CREATE TABLE ${stats_db_name}.datasource_organizations AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='datasourceOrganization'; + +-- datasource sources: +-- where the datasource info have been collected from. +create table if not exists ${stats_db_name}.datasource_sources AS select substr(d.id,4) as id, substr(cf.key, 4) as datasource from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf where d.datainfo.deletedbyinference=false; + +CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS SELECT datasource AS id, id AS result FROM ${stats_db_name}.result_datasources; + + diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql new file mode 100644 index 000000000..a918e4de4 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql @@ -0,0 +1,12 @@ +---------------------------------------------------------------- +---------------------------------------------------------------- +-- Organization table/view and Organization related tables/views +---------------------------------------------------------------- +---------------------------------------------------------------- +DROP TABLE IF EXISTS ${stats_db_name}.organization; +CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization AS SELECT substr(o.id, 4) as id, o.legalname.value as name, o.legalshortname.value as legalshortname, o.country.classid as country +FROM ${openaire_db_name}.organization o WHERE o.datainfo.deletedbyinference=FALSE; + +CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS SELECT organization AS id, id AS datasource FROM ${stats_db_name}.datasource_organizations; + +CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects AS SELECT id AS project, organization as id FROM ${stats_db_name}.project_organizations; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml new file mode 100644 index 000000000..324e6f9a1 --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -0,0 +1,302 @@ + + + + stats_db_name + the target stats database name + + + stats_db_shadow_name + the name of the shadow schema + + + stats_db_production_name + the name of the production schema + + + hive_metastore_uris + hive server metastore URIs + + + hive_jdbc_url + hive server jdbc url + + + hive_timeout + the time period, in seconds, after which Hive fails a transaction if a Hive client has not sent a hearbeat. The default value is 300 seconds. + + + + + ${jobTracker} + ${nameNode} + + + hive.metastore.uris + ${hive_metastore_uris} + + + hive.txn.timeout + ${hive_timeout} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + external_stats_db_name=${external_stats_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + external_stats_db_name=${external_stats_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + stats_db_shadow_name=${stats_db_shadow_name} + + + + + + + + ${jobTracker} + ${nameNode} + impala-shell.sh + ${stats_db_name} + step18.sql + ${wf:appPath()}/scripts/step18.sql + impala-shell.sh + + + + + + + + ${jobTracker} + ${nameNode} + impala-shell.sh + ${stats_db_shadow_name} + step19.sql + ${wf:appPath()}/scripts/step19.sql + impala-shell.sh + + + + + + + + ${jobTracker} + ${nameNode} + updateCache.sh + ${stats_tool_api_url} + updateCache.sh + + + + + + + From 413afcfed5ef1279b2d995cb23329fdbee899055 Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 2 Dec 2020 15:57:17 +0200 Subject: [PATCH 088/108] finished first implementation of wf --- ...{step18.sql => computeProductionStats.sql} | 0 .../graph/stats/oozie_app/scripts/step1.sql | 8 - .../graph/stats/oozie_app/scripts/step10.sql | 21 -- .../graph/stats/oozie_app/scripts/step11.sql | 44 ---- .../graph/stats/oozie_app/scripts/step12.sql | 38 --- .../graph/stats/oozie_app/scripts/step13.sql | 59 ----- .../graph/stats/oozie_app/scripts/step14.sql | 49 ---- .../graph/stats/oozie_app/scripts/step15.sql | 36 --- .../graph/stats/oozie_app/scripts/step16.sql | 80 ------ .../stats/oozie_app/scripts/step16_5.sql | 55 ---- .../stats/oozie_app/scripts/step16_6.sql | 32 --- .../graph/stats/oozie_app/scripts/step17.sql | 207 --------------- .../graph/stats/oozie_app/scripts/step19.sql | 8 - .../graph/stats/oozie_app/scripts/step2.sql | 44 ---- .../graph/stats/oozie_app/scripts/step3.sql | 36 --- .../graph/stats/oozie_app/scripts/step4.sql | 36 --- .../graph/stats/oozie_app/scripts/step5.sql | 36 --- .../graph/stats/oozie_app/scripts/step6.sql | 30 --- .../graph/stats/oozie_app/scripts/step7.sql | 31 --- .../graph/stats/oozie_app/scripts/step8.sql | 58 ---- .../graph/stats/oozie_app/scripts/step9.sql | 12 - .../scripts/updateProductionViews.sql | 207 +++++++++++++++ .../dhp/oa/graph/stats/oozie_app/workflow.xml | 247 +----------------- .../dhp/oa/graph/stats/oozie_app/workflow.xml | 6 +- 24 files changed, 224 insertions(+), 1156 deletions(-) rename dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/{step18.sql => computeProductionStats.sql} (100%) delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step1.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql delete mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql create mode 100644 dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/updateProductionViews.sql diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/computeProductionStats.sql similarity index 100% rename from dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step18.sql rename to dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/computeProductionStats.sql diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step1.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step1.sql deleted file mode 100644 index 9697a1dc8..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step1.sql +++ /dev/null @@ -1,8 +0,0 @@ --------------------------------------------------------------- --------------------------------------------------------------- --- Stats database creation --------------------------------------------------------------- --------------------------------------------------------------- - -DROP database IF EXISTS ${stats_db_name} CASCADE; -CREATE database ${stats_db_name}; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql deleted file mode 100644 index 46ff295f4..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql +++ /dev/null @@ -1,21 +0,0 @@ ------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------- --- Tables/views from external tables/views (Fundref, Country, CountyGDP, roarmap, rndexpediture) ------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------- -CREATE OR REPLACE VIEW ${stats_db_name}.fundref AS SELECT * FROM ${external_stats_db_name}.fundref; -CREATE OR REPLACE VIEW ${stats_db_name}.country AS SELECT * FROM ${external_stats_db_name}.country; -CREATE OR REPLACE VIEW ${stats_db_name}.countrygdp AS SELECT * FROM ${external_stats_db_name}.countrygdp; -CREATE OR REPLACE VIEW ${stats_db_name}.roarmap AS SELECT * FROM ${external_stats_db_name}.roarmap; -CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS SELECT * FROM ${external_stats_db_name}.rndexpediture; -CREATE OR REPLACE VIEW ${stats_db_name}.context AS SELECT * FROM ${external_stats_db_name}.context; -CREATE OR REPLACE VIEW ${stats_db_name}.category AS SELECT * FROM ${external_stats_db_name}.category; -CREATE OR REPLACE VIEW ${stats_db_name}.concept AS SELECT * FROM ${external_stats_db_name}.concept; - - ------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------- --- Creation date of the database ------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------- -create table ${stats_db_name}.creation_date as select date_format(current_date(), 'dd-MM-yyyy') as date; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql deleted file mode 100644 index 13e141459..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql +++ /dev/null @@ -1,44 +0,0 @@ ----------------------------------------------------------------- ----------------------------------------------------------------- --- Post processing - Updates on main tables ----------------------------------------------------------------- ----------------------------------------------------------------- - ---Datasource temporary table updates -UPDATE ${stats_db_name}.datasource_tmp SET harvested='true' WHERE datasource_tmp.id IN (SELECT DISTINCT d.id FROM ${stats_db_name}.datasource_tmp d, ${stats_db_name}.result_datasources rd WHERE d.id=rd.datasource); - --- Project temporary table update and final project table creation with final updates that can not be applied to ORC tables -UPDATE ${stats_db_name}.project_tmp SET haspubs='yes' WHERE project_tmp.id IN (SELECT pr.id FROM ${stats_db_name}.project_results pr, ${stats_db_name}.result r WHERE pr.result=r.id AND r.type='publication'); - -DROP TABLE IF EXISTS ${stats_db_name}.project; -CREATE TABLE ${stats_db_name}.project stored as parquet as -SELECT p.id , p.acronym, p.title, p.funder, p.funding_lvl0, p.funding_lvl1, p.funding_lvl2, p.ec39, p.type, p.startdate, p.enddate, p.start_year, p.end_year, p.duration, -CASE WHEN prr1.id IS NULL THEN 'no' ELSE 'yes' END AS haspubs, -CASE WHEN prr1.id IS NULL THEN 0 ELSE prr1.np END AS numpubs, -CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.daysForlastPub END AS daysforlastpub, -CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs, -p.callidentifier, p.code -FROM ${stats_db_name}.project_tmp p -LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np - FROM ${stats_db_name}.project_results pr INNER JOIN ${stats_db_name}.result r ON pr.result=r.id - WHERE r.type='publication' - GROUP BY pr.id) AS prr1 on prr1.id = p.id -LEFT JOIN (SELECT pp.id, max(datediff(to_date(r.date), to_date(pp.enddate)) ) AS daysForlastPub , count(distinct r.id) AS dp - FROM ${stats_db_name}.project_tmp pp, ${stats_db_name}.project_results pr, ${stats_db_name}.result r - WHERE pp.id=pr.id AND pr.result=r.id AND r.type='publication' AND datediff(to_date(r.date), to_date(pp.enddate)) > 0 - GROUP BY pp.id) AS prr2 - ON prr2.id = p.id; - --- Publication temporary table updates -UPDATE ${stats_db_name}.publication_tmp SET delayed = 'yes' WHERE publication_tmp.id IN (SELECT distinct r.id FROM stats_wf_db_obs.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date)-to_date(p.enddate) > 0); - --- Dataset temporary table updates -UPDATE ${stats_db_name}.dataset_tmp SET delayed = 'yes' WHERE dataset_tmp.id IN (SELECT distinct r.id FROM stats_wf_db_obs.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date)-to_date(p.enddate) > 0); - --- Software temporary table updates -UPDATE ${stats_db_name}.software_tmp SET delayed = 'yes' WHERE software_tmp.id IN (SELECT distinct r.id FROM ${stats_db_name}.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date)-to_date(p.enddate) > 0); - --- Oherresearchproduct temporary table updates -UPDATE ${stats_db_name}.otherresearchproduct_tmp SET delayed = 'yes' WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id FROM ${stats_db_name}.result r, ${stats_db_name}.project_results pr, ${stats_db_name}.project_tmp p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date)-to_date(p.enddate) > 0); - -CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS SELECT result_projects.id AS result, result_projects.project AS project_results, result.date as resultdate, project.enddate as projectenddate, result_projects.daysfromend AS daysfromend FROM ${stats_db_name}.result_projects, ${stats_db_name}.result, ${stats_db_name}.project WHERE result_projects.id=result.id AND result.type='publication' AND project.id=result_projects.project; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql deleted file mode 100644 index 25439852e..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step12.sql +++ /dev/null @@ -1,38 +0,0 @@ ------------------------------------------------------------------------------------------------------- --- Creating parquet tables from the updated temporary tables and removing unnecessary temporary tables ------------------------------------------------------------------------------------------------------- - -DROP TABLE IF EXISTS ${stats_db_name}.datasource; -CREATE TABLE ${stats_db_name}.datasource stored AS parquet AS SELECT * FROM ${stats_db_name}.datasource_tmp; - -DROP TABLE IF EXISTS ${stats_db_name}.publication; -CREATE TABLE ${stats_db_name}.publication stored AS parquet AS SELECT * FROM ${stats_db_name}.publication_tmp; - -DROP TABLE IF EXISTS ${stats_db_name}.dataset; -CREATE TABLE ${stats_db_name}.dataset stored AS parquet AS SELECT * FROM ${stats_db_name}.dataset_tmp; - -DROP TABLE IF EXISTS ${stats_db_name}.software; -CREATE TABLE ${stats_db_name}.software stored AS parquet AS SELECT * FROM ${stats_db_name}.software_tmp; - -DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct; -CREATE TABLE ${stats_db_name}.otherresearchproduct stored AS parquet AS SELECT * FROM ${stats_db_name}.otherresearchproduct_tmp; - -DROP TABLE ${stats_db_name}.project_tmp; -DROP TABLE ${stats_db_name}.datasource_tmp; -DROP TABLE ${stats_db_name}.publication_tmp; -DROP TABLE ${stats_db_name}.dataset_tmp; -DROP TABLE ${stats_db_name}.software_tmp; -DROP TABLE ${stats_db_name}.otherresearchproduct_tmp; - ----------------------------------------------- --- Re-creating views from final parquet tables ---------------------------------------------- - --- Result -CREATE OR REPLACE VIEW ${stats_db_name}.result AS SELECT *, bestlicence AS access_mode FROM ${stats_db_name}.publication UNION ALL SELECT *, bestlicence as access_mode FROM ${stats_db_name}.software UNION ALL SELECT *, bestlicence AS access_mode FROM ${stats_db_name}.dataset UNION ALL SELECT *, bestlicence AS access_mode FROM ${stats_db_name}.otherresearchproduct; - - -------------------------------------------------------------------------------- --- To see with Antonis if the following is needed and where it should be placed -------------------------------------------------------------------------------- -CREATE TABLE ${stats_db_name}.numbers_country AS SELECT org.country AS country, count(distinct rd.datasource) AS datasources, count(distinct r.id) AS publications FROM ${stats_db_name}.result r, ${stats_db_name}.result_datasources rd, ${stats_db_name}.datasource d, ${stats_db_name}.datasource_organizations dor, ${stats_db_name}.organization org WHERE r.id=rd.id AND rd.datasource=d.id AND d.id=dor.id AND dor.organization=org.id AND r.type='publication' AND r.bestlicence='Open Access' GROUP BY org.country; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql deleted file mode 100644 index 795770313..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ /dev/null @@ -1,59 +0,0 @@ ------------------------------------------------------- ------------------------------------------------------- --- Additional relations --- --- Sources related tables/views ------------------------------------------------------- ------------------------------------------------------- -CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources as -SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource -FROM ( - SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource -from ${openaire_db_name}.publication p lateral view explode(p.collectedfrom.key) c as datasource) p -LEFT OUTER JOIN -( - SELECT substr(d.id, 4) id - from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; - -CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as -SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource -FROM ( - SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource -from ${openaire_db_name}.dataset p lateral view explode(p.collectedfrom.key) c as datasource) p -LEFT OUTER JOIN -( - SELECT substr(d.id, 4) id - from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; - -CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as -SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource -FROM ( - SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource -from ${openaire_db_name}.software p lateral view explode(p.collectedfrom.key) c as datasource) p -LEFT OUTER JOIN -( - SELECT substr(d.id, 4) id - from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; - -CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as -SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource -FROM ( - SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource -from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.collectedfrom.key) c as datasource) p -LEFT OUTER JOIN -( - SELECT substr(d.id, 4) id - from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; - -CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS -SELECT * FROM ${stats_db_name}.publication_sources -UNION ALL -SELECT * FROM ${stats_db_name}.dataset_sources -UNION ALL -SELECT * FROM ${stats_db_name}.software_sources -UNION ALL -SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql deleted file mode 100644 index 4a56b5d68..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql +++ /dev/null @@ -1,49 +0,0 @@ ------------------------------------------------------- ------------------------------------------------------- --- Additional relations --- --- Licences related tables/views ------------------------------------------------------- ------------------------------------------------------- -CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS -SELECT substr(p.id, 4) as id, licenses.value as type -from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; - -CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS -SELECT substr(p.id, 4) as id, licenses.value as type -from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; - -CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS -SELECT substr(p.id, 4) as id, licenses.value as type -from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; - -CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS -SELECT substr(p.id, 4) as id, licenses.value as type -from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses -where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; - -CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS -SELECT * FROM ${stats_db_name}.publication_licenses -UNION ALL -SELECT * FROM ${stats_db_name}.dataset_licenses -UNION ALL -SELECT * FROM ${stats_db_name}.software_licenses -UNION ALL -SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses; - -CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids AS -select substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid -from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid; - -CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources as -SELECT o.id, case when d.id is null then 'other' else o.datasource end as datasource -FROM ( - SELECT substr(o.id, 4) as id, substr(instances.instance.key, 4) as datasource - from ${openaire_db_name}.organization o lateral view explode(o.collectedfrom) instances as instance) o - LEFT OUTER JOIN ( - SELECT substr(d.id, 4) id - from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql deleted file mode 100644 index 60b37048b..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ /dev/null @@ -1,36 +0,0 @@ ------------------------------------------------------- ------------------------------------------------------- --- Additional relations --- --- Refereed related tables/views ------------------------------------------------------- ------------------------------------------------------- - -CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as -select substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; - -CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as -select substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; - -CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as -select substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; - -CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as -select substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false; - -CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as -select * from ${stats_db_name}.publication_refereed -union all -select * from ${stats_db_name}.dataset_refereed -union all -select * from ${stats_db_name}.software_refereed -union all -select * from ${stats_db_name}.otherresearchproduct_refereed; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql deleted file mode 100644 index 33849b960..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql +++ /dev/null @@ -1,80 +0,0 @@ ----------------------------------------------------- --- Shortcuts for various definitions in stats db --- ----------------------------------------------------- - --- Peer reviewed: --- Results that have been collected from Crossref -create table ${stats_db_name}.result_peerreviewed as -with peer_reviewed as ( - select distinct r.id as id - from ${stats_db_name}.result r - join ${stats_db_name}.result_sources rs on rs.id=r.id - join ${stats_db_name}.datasource d on d.id=rs.datasource - where d.name='Crossref') -select distinct peer_reviewed.id as id, true as peer_reviewed -from peer_reviewed -union all -select distinct r.id as id, false as peer_reviewed -from ${stats_db_name}.result r -left outer join peer_reviewed pr on pr.id=r.id -where pr.id is null; - --- Green OA: --- OA results that are hosted by an Institutional repository and have NOT been harvested from a DOAJ journal. -create table ${stats_db_name}.result_greenoa as -with result_green as ( - select distinct r.id as id - from ${stats_db_name}.result r - join ${stats_db_name}.result_datasources rd on rd.id=r.id - join ${stats_db_name}.datasource d on d.id=rd.datasource - left outer join ( - select rd.id from ${stats_db_name}.result_datasources rd - join ${stats_db_name}.datasource d on rd.datasource=d.id - join ${stats_db_name}.datasource_sources sds on sds.id=d.id - join ${stats_db_name}.datasource sd on sd.id=sds.datasource - where sd.name='DOAJ-ARTICLES' - ) as doaj on doaj.id=r.id - where r.bestlicence in ('Open Access', 'Open Source') and d.type='Institutional Repository' and doaj.id is null) -select distinct result_green.id, true as green -from result_green -union all -select distinct r.id as id, false as green -from ${stats_db_name}.result r -left outer join result_green rg on rg.id=r.id -where rg.id is null; - --- GOLD OA: --- OA results that have been harvested from a DOAJ journal. -create table ${stats_db_name}.result_gold as -with result_gold as ( - select distinct r.id as id - from ${stats_db_name}.result r - join ${stats_db_name}.result_datasources rd on rd.id=r.id - join ${stats_db_name}.datasource d on d.id=rd.datasource - join ${stats_db_name}.datasource_sources sds on sds.id=d.id - join ${stats_db_name}.datasource sd on sd.id=sds.datasource - where r.type='publication' and r.bestlicence='Open Access' and sd.name='DOAJ-Articles') -select distinct result_gold.id, true as gold -from result_gold -union all -select distinct r.id, false as gold -from ${stats_db_name}.result r -where r.id not in (select id from result_gold); - --- shortcut result-country through the organization affiliation -create table ${stats_db_name}.result_affiliated_country as -select r.id as id, o.country as country -from ${stats_db_name}.result r -join ${stats_db_name}.result_organization ro on ro.id=r.id -join ${stats_db_name}.organization o on o.id=ro.organization -where o.country is not null and o.country!=''; - --- shortcut result-country through datasource of deposition -create table ${stats_db_name}.result_deposited_country as -select r.id as id, o.country as country -from ${stats_db_name}.result r -join ${stats_db_name}.result_datasources rd on rd.id=r.id -join ${stats_db_name}.datasource d on d.id=rd.datasource -join ${stats_db_name}.datasource_organizations dor on dor.id=d.id -join ${stats_db_name}.organization o on o.id=dor.organization -where o.country is not null and o.country!=''; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql deleted file mode 100644 index f737c1ea6..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_5.sql +++ /dev/null @@ -1,55 +0,0 @@ --- replace the creation of the result view to include the boolean fields from the previous tables (green, gold, --- peer reviewed) -drop table if exists ${stats_db_name}.result_tmp; -CREATE TABLE ${stats_db_name}.result_tmp ( - id STRING, - title STRING, - publisher STRING, - journal STRING, - `date` STRING, - `year` INT, - bestlicence STRING, - access_mode STRING, - embargo_end_date STRING, - delayed BOOLEAN, - authors INT, - source STRING, - abstract BOOLEAN, - type STRING , - peer_reviewed BOOLEAN, - green BOOLEAN, - gold BOOLEAN) -clustered by (id) into 100 buckets stored as orc tblproperties('transactional'='true'); - -insert into ${stats_db_name}.result_tmp -select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold -FROM ${stats_db_name}.publication r -LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id -LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id -LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; - -insert into ${stats_db_name}.result_tmp -select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold -FROM ${stats_db_name}.dataset r -LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id -LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id -LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; - -insert into ${stats_db_name}.result_tmp -select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold -FROM ${stats_db_name}.software r -LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id -LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id -LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; - -insert into ${stats_db_name}.result_tmp -select r.id, r.title, r.publisher, r.journal, r.`date`, date_format(r.`date`, 'yyyy'), r.bestlicence, r.bestlicence, r.embargo_end_date, r.delayed, r.authors, r.source, r.abstract, r.type, pr.peer_reviewed, green.green, gold.gold -FROM ${stats_db_name}.otherresearchproduct r -LEFT OUTER JOIN ${stats_db_name}.result_peerreviewed pr on pr.id=r.id -LEFT OUTER JOIN ${stats_db_name}.result_greenoa green on green.id=r.id -LEFT OUTER JOIN ${stats_db_name}.result_gold gold on gold.id=r.id; - -drop table if exists ${stats_db_name}.result; -drop view if exists ${stats_db_name}.result; -create table ${stats_db_name}.result stored as parquet as select * from ${stats_db_name}.result_tmp; -drop table ${stats_db_name}.result_tmp; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql deleted file mode 100644 index ced7bbc11..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql +++ /dev/null @@ -1,32 +0,0 @@ -------------------------------------------- ---- Extra tables, mostly used by indicators - -create table ${stats_db_name}.result_projectcount as -select r.id, count(distinct p.id) as count -from ${stats_db_name}.result r -left outer join ${stats_db_name}.result_projects rp on rp.id=r.id -left outer join ${stats_db_name}.project p on p.id=rp.project -group by r.id; - -create table ${stats_db_name}.result_fundercount as -select r.id, count(distinct p.funder) as count -from ${stats_db_name}.result r -left outer join ${stats_db_name}.result_projects rp on rp.id=r.id -left outer join ${stats_db_name}.project p on p.id=rp.project -group by r.id; - -create table ${stats_db_name}.project_resultcount as -with rcount as ( - select p.id as pid, count(distinct r.id) as `count`, r.type as type - from ${stats_db_name}.project p - left outer join ${stats_db_name}.result_projects rp on rp.project=p.id - left outer join ${stats_db_name}.result r on r.id=rp.id - group by r.type, p.id ) -select rcount.pid, sum(case when rcount.type='publication' then rcount.count else 0 end) as publications, - sum(case when rcount.type='dataset' then rcount.count else 0 end) as datasets, - sum(case when rcount.type='software' then rcount.count else 0 end) as software, - sum(case when rcount.type='other' then rcount.count else 0 end) as other -from rcount -group by rcount.pid; - -create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql deleted file mode 100644 index 5c102d014..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step17.sql +++ /dev/null @@ -1,207 +0,0 @@ ------------------------------------------------------- ------------------------------------------------------- --- Shadow schema table exchange ------------------------------------------------------- ------------------------------------------------------- - --- Dropping old views -DROP VIEW IF EXISTS ${stats_db_shadow_name}.category; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.concept; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.context; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.country; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.countrygdp; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.creation_date; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_citations; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_classifications; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_concepts; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_datasources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_languages; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_licenses; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_oids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_pids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_refereed; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_sources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.dataset_topics; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_languages; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_oids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_organizations; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_results; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.datasource_sources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.funder; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.fundref; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.numbers_country; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_datasources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_pids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_projects; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.organization_sources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_citations; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_classifications; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_concepts; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_datasources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_languages; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_licenses; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_oids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_pids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_refereed; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_sources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.otherresearchproduct_topics; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.project; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_oids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_organizations; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_results; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_resultcount; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.project_results_publication; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_citations; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_classifications; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_concepts; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_datasources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_languages; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_licenses; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_oids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_pids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_refereed; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_sources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.publication_topics; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_affiliated_country; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_citations; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_classifications; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_concepts; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_datasources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_deposited_country; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_fundercount; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_gold; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_greenoa; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_languages; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_licenses; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_oids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_organization; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_peerreviewed; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_pids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_projectcount; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_projects; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_refereed; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_sources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.result_topics; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.rndexpediture; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.roarmap; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.software; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_citations; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_classifications; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_concepts; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_datasources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_languages; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_licenses; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_oids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_pids; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_refereed; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_sources; -DROP VIEW IF EXISTS ${stats_db_shadow_name}.software_topics; - - --- Creating the shadow database, in case it doesn't exist -CREATE database IF NOT EXISTS ${stats_db_shadow_name}; - --- Creating new views -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.category AS SELECT * FROM ${stats_db_name}.category; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.concept AS SELECT * FROM ${stats_db_name}.concept; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.context AS SELECT * FROM ${stats_db_name}.context; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.country AS SELECT * FROM ${stats_db_name}.country; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.countrygdp AS SELECT * FROM ${stats_db_name}.countrygdp; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.creation_date AS SELECT * FROM ${stats_db_name}.creation_date; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset AS SELECT * FROM ${stats_db_name}.dataset; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_citations AS SELECT * FROM ${stats_db_name}.dataset_citations; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_classifications AS SELECT * FROM ${stats_db_name}.dataset_classifications; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_concepts AS SELECT * FROM ${stats_db_name}.dataset_concepts; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_datasources AS SELECT * FROM ${stats_db_name}.dataset_datasources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_languages AS SELECT * FROM ${stats_db_name}.dataset_languages; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_licenses AS SELECT * FROM ${stats_db_name}.dataset_licenses; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_oids AS SELECT * FROM ${stats_db_name}.dataset_oids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_pids AS SELECT * FROM ${stats_db_name}.dataset_pids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_refereed AS SELECT * FROM ${stats_db_name}.dataset_refereed; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_sources AS SELECT * FROM ${stats_db_name}.dataset_sources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.dataset_topics AS SELECT * FROM ${stats_db_name}.dataset_topics; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource AS SELECT * FROM ${stats_db_name}.datasource; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_languages AS SELECT * FROM ${stats_db_name}.datasource_languages; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_oids AS SELECT * FROM ${stats_db_name}.datasource_oids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_organizations AS SELECT * FROM ${stats_db_name}.datasource_organizations; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_results AS SELECT * FROM ${stats_db_name}.datasource_results; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.datasource_sources AS SELECT * FROM ${stats_db_name}.datasource_sources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.funder AS SELECT * FROM ${stats_db_name}.funder; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.fundref AS SELECT * FROM ${stats_db_name}.fundref; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.numbers_country AS SELECT * FROM ${stats_db_name}.numbers_country; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization AS SELECT * FROM ${stats_db_name}.organization; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_datasources AS SELECT * FROM ${stats_db_name}.organization_datasources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_pids AS SELECT * FROM ${stats_db_name}.organization_pids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_projects AS SELECT * FROM ${stats_db_name}.organization_projects; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.organization_sources AS SELECT * FROM ${stats_db_name}.organization_sources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct AS SELECT * FROM ${stats_db_name}.otherresearchproduct; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_citations AS SELECT * FROM ${stats_db_name}.otherresearchproduct_citations; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_classifications AS SELECT * FROM ${stats_db_name}.otherresearchproduct_classifications; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_concepts AS SELECT * FROM ${stats_db_name}.otherresearchproduct_concepts; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_datasources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_datasources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_languages AS SELECT * FROM ${stats_db_name}.otherresearchproduct_languages; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_licenses AS SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_oids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_oids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_pids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_pids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_refereed AS SELECT * FROM ${stats_db_name}.otherresearchproduct_refereed; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_sources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.otherresearchproduct_topics AS SELECT * FROM ${stats_db_name}.otherresearchproduct_topics; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project AS SELECT * FROM ${stats_db_name}.project; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_oids AS SELECT * FROM ${stats_db_name}.project_oids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_organizations AS SELECT * FROM ${stats_db_name}.project_organizations; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_results AS SELECT * FROM ${stats_db_name}.project_results; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_resultcount AS SELECT * FROM ${stats_db_name}.project_resultcount; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.project_results_publication AS SELECT * FROM ${stats_db_name}.project_results_publication; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication AS SELECT * FROM ${stats_db_name}.publication; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_citations AS SELECT * FROM ${stats_db_name}.publication_citations; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_classifications AS SELECT * FROM ${stats_db_name}.publication_classifications; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_concepts AS SELECT * FROM ${stats_db_name}.publication_concepts; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_datasources AS SELECT * FROM ${stats_db_name}.publication_datasources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_languages AS SELECT * FROM ${stats_db_name}.publication_languages; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_licenses AS SELECT * FROM ${stats_db_name}.publication_licenses; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_oids AS SELECT * FROM ${stats_db_name}.publication_oids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_pids AS SELECT * FROM ${stats_db_name}.publication_pids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_refereed AS SELECT * FROM ${stats_db_name}.publication_refereed; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_sources AS SELECT * FROM ${stats_db_name}.publication_sources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.publication_topics AS SELECT * FROM ${stats_db_name}.publication_topics; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result AS SELECT * FROM ${stats_db_name}.result; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_affiliated_country AS SELECT * FROM ${stats_db_name}.result_affiliated_country; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_citations AS SELECT * FROM ${stats_db_name}.result_citations; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_classifications AS SELECT * FROM ${stats_db_name}.result_classifications; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_concepts AS SELECT * FROM ${stats_db_name}.result_concepts; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_datasources AS SELECT * FROM ${stats_db_name}.result_datasources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_deposited_country AS SELECT * FROM ${stats_db_name}.result_deposited_country; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_fundercount AS SELECT * FROM ${stats_db_name}.result_fundercount; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_gold AS SELECT * FROM ${stats_db_name}.result_gold; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_greenoa AS SELECT * FROM ${stats_db_name}.result_greenoa; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_languages AS SELECT * FROM ${stats_db_name}.result_languages; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_licenses AS SELECT * FROM ${stats_db_name}.result_licenses; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_oids AS SELECT * FROM ${stats_db_name}.result_oids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_organization AS SELECT * FROM ${stats_db_name}.result_organization; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_peerreviewed AS SELECT * FROM ${stats_db_name}.result_peerreviewed; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_pids AS SELECT * FROM ${stats_db_name}.result_pids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_projectcount AS SELECT * FROM ${stats_db_name}.result_projectcount; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_projects AS SELECT * FROM ${stats_db_name}.result_projects; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_refereed AS SELECT * FROM ${stats_db_name}.result_refereed; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_sources AS SELECT * FROM ${stats_db_name}.result_sources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.result_topics AS SELECT * FROM ${stats_db_name}.result_topics; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.rndexpediture AS SELECT * FROM ${stats_db_name}.rndexpediture; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.roarmap AS SELECT * FROM ${stats_db_name}.roarmap; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software AS SELECT * FROM ${stats_db_name}.software; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_citations AS SELECT * FROM ${stats_db_name}.software_citations; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_classifications AS SELECT * FROM ${stats_db_name}.software_classifications; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_concepts AS SELECT * FROM ${stats_db_name}.software_concepts; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_datasources AS SELECT * FROM ${stats_db_name}.software_datasources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_languages AS SELECT * FROM ${stats_db_name}.software_languages; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_licenses AS SELECT * FROM ${stats_db_name}.software_licenses; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_oids AS SELECT * FROM ${stats_db_name}.software_oids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_pids AS SELECT * FROM ${stats_db_name}.software_pids; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_refereed AS SELECT * FROM ${stats_db_name}.software_refereed; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_sources AS SELECT * FROM ${stats_db_name}.software_sources; -CREATE VIEW IF NOT EXISTS ${stats_db_shadow_name}.software_topics AS SELECT * FROM ${stats_db_name}.software_topics; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql deleted file mode 100644 index 34e48a18a..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step19.sql +++ /dev/null @@ -1,8 +0,0 @@ ------------------------------------------------------- ------------------------------------------------------- --- Impala table statistics - Needed to make the tables --- visible for impala ------------------------------------------------------- ------------------------------------------------------- - -INVALIDATE METADATA ${stats_db_name}; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql deleted file mode 100644 index ba0db25be..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql +++ /dev/null @@ -1,44 +0,0 @@ --------------------------------------------------------------- --------------------------------------------------------------- --- Publication table/view and Publication related tables/views --------------------------------------------------------------- --------------------------------------------------------------- - --- Publication temporary table -DROP TABLE IF EXISTS ${stats_db_name}.publication_tmp; - -CREATE TABLE ${stats_db_name}.publication_tmp (id STRING, title STRING, publisher STRING, journal STRING, date STRING, year STRING, bestlicence STRING, embargo_end_date STRING, delayed BOOLEAN, authors INT, source STRING, abstract BOOLEAN, type STRING ) clustered by (id) into 100 buckets stored as orc tblproperties('transactional'='true'); - -INSERT INTO ${stats_db_name}.publication_tmp SELECT substr(p.id, 4) as id, p.title[0].value as title, p.publisher.value as publisher, p.journal.name as journal , -p.dateofacceptance.value as date, date_format(p.dateofacceptance.value,'yyyy') as year, p.bestaccessright.classname as bestlicence, -p.embargoenddate.value as embargo_end_date, false as delayed, size(p.author) as authors , concat_ws('\u003B',p.source.value) as source, -case when size(p.description) > 0 then true else false end as abstract, -'publication' as type -from ${openaire_db_name}.publication p -where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.publication_classifications AS SELECT substr(p.id, 4) as id, instancetype.classname as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.instancetype) instances as instancetype where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.publication_concepts AS SELECT substr(p.id, 4) as id, contexts.context.id as concept from ${openaire_db_name}.publication p LATERAL VIEW explode(p.context) contexts as context where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.publication_datasources as -SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource - FROM ( - SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource - from ${openaire_db_name}.publication p lateral view explode(p.instance) instances as instance - where p.datainfo.deletedbyinference=false ) p - LEFT OUTER JOIN ( - SELECT substr(d.id, 4) id - from ${openaire_db_name}.datasource d - WHERE d.datainfo.deletedbyinference=false ) d on p.datasource = d.id; - -CREATE TABLE ${stats_db_name}.publication_languages AS select substr(p.id, 4) as id, p.language.classname as language FROM ${openaire_db_name}.publication p where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.publication_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.publication_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.publication_topics as select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false; - --- Publication_citations -CREATE TABLE ${stats_db_name}.publication_citations AS SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and p.datainfo.deletedbyinference=false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql deleted file mode 100644 index f69715a31..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql +++ /dev/null @@ -1,36 +0,0 @@ ------------------------------------------------------- ------------------------------------------------------- --- Dataset table/view and Dataset related tables/views ------------------------------------------------------- ------------------------------------------------------- - --- Dataset temporary table supporting updates -DROP TABLE IF EXISTS ${stats_db_name}.dataset_tmp; -CREATE TABLE ${stats_db_name}.dataset_tmp (id STRING, title STRING, publisher STRING, journal STRING, date STRING, year STRING, bestlicence STRING, embargo_end_date STRING, delayed BOOLEAN, authors INT, source STRING, abstract BOOLEAN, type STRING ) clustered by (id) into 100 buckets stored AS orc tblproperties('transactional'='true'); - -INSERT INTO ${stats_db_name}.dataset_tmp SELECT substr(d.id, 4) AS id, d.title[0].value AS title, d.publisher.value AS publisher, cast(null AS string) AS journal, -d.dateofacceptance.value as date, date_format(d.dateofacceptance.value,'yyyy') AS year, d.bestaccessright.classname AS bestlicence, -d.embargoenddate.value AS embargo_end_date, false AS delayed, size(d.author) AS authors , concat_ws('\u003B',d.source.value) AS source, - CASE WHEN SIZE(d.description) > 0 THEN TRUE ELSE FALSE end AS abstract, -'dataset' AS type -FROM ${openaire_db_name}.dataset d -WHERE d.datainfo.deletedbyinference=FALSE; - --- Dataset_citations -CREATE TABLE ${stats_db_name}.dataset_citations AS SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result FROM ${openaire_db_name}.dataset d LATERAL VIEW explode(d.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and d.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.dataset_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.dataset_concepts AS SELECT substr(p.id, 4) as id, contexts.context.id as concept from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.context) contexts as context where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.dataset_datasources AS SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource -FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance) instances AS instance where p.datainfo.deletedbyinference=false) p LEFT OUTER JOIN -(SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d ON p.datasource = d.id; - -CREATE TABLE ${stats_db_name}.dataset_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.dataset p where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.dataset_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.dataset_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.dataset_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql deleted file mode 100644 index 2c4a625e1..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql +++ /dev/null @@ -1,36 +0,0 @@ --------------------------------------------------------- --------------------------------------------------------- --- Software table/view and Software related tables/views --------------------------------------------------------- --------------------------------------------------------- - --- Software temporary table supporting updates -DROP TABLE IF EXISTS ${stats_db_name}.software_tmp; -CREATE TABLE ${stats_db_name}.software_tmp (id STRING, title STRING, publisher STRING, journal STRING, date STRING, year STRING, bestlicence STRING, embargo_end_date STRING, delayed BOOLEAN, authors INT, source STRING, abstract BOOLEAN, type STRING ) clustered by (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true'); - -INSERT INTO ${stats_db_name}.software_tmp SELECT substr(s.id, 4) as id, s.title[0].value AS title, s.publisher.value AS publisher, CAST(NULL AS string) AS journal, -s.dateofacceptance.value AS DATE, date_format(s.dateofacceptance.value,'yyyy') AS YEAR, s.bestaccessright.classname AS bestlicence, -s.embargoenddate.value AS embargo_end_date, FALSE AS delayed, SIZE(s.author) AS authors , concat_ws('\u003B',s.source.value) AS source, - CASE WHEN SIZE(s.description) > 0 THEN TRUE ELSE FALSE END AS abstract, -'software' as type -from ${openaire_db_name}.software s -where s.datainfo.deletedbyinference=false; - --- Software_citations -CREATE TABLE ${stats_db_name}.software_citations AS SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT FROM ${openaire_db_name}.software s LATERAL VIEW explode(s.extrainfo) citations as citation where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and s.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.software_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.software_concepts AS SELECT substr(p.id, 4) AS id, contexts.context.id AS concept FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.context) contexts AS context where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.software_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource -FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance) instances AS instance where p.datainfo.deletedbyinference=false) p LEFT OUTER JOIN -(SELECT substr(d.id, 4) id FROM ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d ON p.datasource = d.id; - -CREATE TABLE ${stats_db_name}.software_languages AS select substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.software p where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.software_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.software_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.software_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql deleted file mode 100644 index 1fa5df8cb..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql +++ /dev/null @@ -1,36 +0,0 @@ --------------------------------------------------------------------------------- --------------------------------------------------------------------------------- --- Otherresearchproduct table/view and Otherresearchproduct related tables/views --------------------------------------------------------------------------------- --------------------------------------------------------------------------------- - --- Otherresearchproduct temporary table supporting updates -DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_tmp; -CREATE TABLE ${stats_db_name}.otherresearchproduct_tmp ( id STRING, title STRING, publisher STRING, journal STRING, date STRING, year STRING, bestlicence STRING, embargo_end_date STRING, delayed BOOLEAN, authors INT, source STRING, abstract BOOLEAN, type STRING ) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true'); - -INSERT INTO ${stats_db_name}.otherresearchproduct_tmp SELECT substr(o.id, 4) AS id, o.title[0].value AS title, o.publisher.value AS publisher, CAST(NULL AS string) AS journal, -o.dateofacceptance.value AS DATE, date_format(o.dateofacceptance.value,'yyyy') AS year, o.bestaccessright.classname AS bestlicence, -o.embargoenddate.value as embargo_end_date, FALSE AS delayed, SIZE(o.author) AS authors , concat_ws('\u003B',o.source.value) AS source, -CASE WHEN SIZE(o.description) > 0 THEN TRUE ELSE FALSE END AS abstract, -'other' AS type -FROM ${openaire_db_name}.otherresearchproduct o -WHERE o.datainfo.deletedbyinference=FALSE; - --- Otherresearchproduct_citations -CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") !="" and o.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS SELECT substr(p.id, 4) AS id, instancetype.classname AS type FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS SELECT substr(p.id, 4) AS id, contexts.context.id AS concept FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource -from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance where p.datainfo.deletedbyinference=false) p LEFT OUTER JOIN -(SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; - -CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS SELECT substr(p.id, 4) AS id, p.language.classname AS language FROM ${openaire_db_name}.otherresearchproduct p where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference=false; - -CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference=false; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql deleted file mode 100644 index 21a944164..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ /dev/null @@ -1,30 +0,0 @@ --- noinspection SqlNoDataSourceInspectionForFile - ------------------------------------------------------- ------------------------------------------------------- --- Project table/view and Project related tables/views ------------------------------------------------------- ------------------------------------------------------- --- Project_oids Table -DROP TABLE IF EXISTS ${stats_db_name}.project_oids; -CREATE TABLE ${stats_db_name}.project_oids AS SELECT substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids; - --- Project_organizations Table -DROP TABLE IF EXISTS ${stats_db_name}.project_organizations; -CREATE TABLE ${stats_db_name}.project_organizations AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype='projectOrganization'; - --- Project_results Table -DROP TABLE IF EXISTS ${stats_db_name}.project_results; -CREATE TABLE ${stats_db_name}.project_results AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result FROM ${openaire_db_name}.relation r WHERE r.reltype='resultProject' and r.datainfo.deletedbyinference=false; - --- Project table ----------------- --- Creating and populating temporary Project table -DROP TABLE IF EXISTS ${stats_db_name}.project_tmp; -CREATE TABLE ${stats_db_name}.project_tmp (id STRING, acronym STRING, title STRING, funder STRING, funding_lvl0 STRING, funding_lvl1 STRING, funding_lvl2 STRING, ec39 STRING, type STRING, startdate STRING, enddate STRING, start_year INT, end_year INT, duration INT, haspubs STRING, numpubs INT, daysforlastpub INT, delayedpubs INT, callidentifier STRING, code STRING) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true'); - -INSERT INTO ${stats_db_name}.project_tmp SELECT substr(p.id, 4) AS id, p.acronym.value AS acronym, p.title.value AS title, xpath_string(p.fundingtree[0].value, '//funder/name') AS funder, xpath_string(p.fundingtree[0].value, '//funding_level_0/name') AS funding_lvl0, xpath_string(p.fundingtree[0].value, '//funding_level_1/name') AS funding_lvl1, xpath_string(p.fundingtree[0].value, '//funding_level_2/name') AS funding_lvl2, p.ecsc39.value AS ec39, p.contracttype.classname AS type, p.startdate.value AS startdate, p.enddate.value AS enddate, year(p.startdate.value) AS start_year, year(p.enddate.value) AS end_year, CAST(MONTHS_BETWEEN(p.enddate.value, p.startdate.value) AS INT) AS duration, 'no' AS haspubs, 0 AS numpubs, 0 AS daysforlastpub, 0 AS delayedpubs, p.callidentifier.value AS callidentifier, p.code.value AS code FROM ${openaire_db_name}.project p WHERE p.datainfo.deletedbyinference=false; - -create table ${stats_db_name}.funder as -select distinct xpath_string(fund, '//funder/id') as id, xpath_string(fund, '//funder/name') as name, xpath_string(fund, '//funder/shortname') as shortname -from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql deleted file mode 100644 index 7acabf1dd..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ /dev/null @@ -1,31 +0,0 @@ ----------------------------------------------------- ----------------------------------------------------- --- Result table/view and Result related tables/views ----------------------------------------------------- ----------------------------------------------------- - --- Views on temporary tables that should be re-created in the end -CREATE OR REPLACE VIEW ${stats_db_name}.result as SELECT *, bestlicence AS access_mode FROM ${stats_db_name}.publication_tmp UNION ALL SELECT *,bestlicence AS access_mode FROM ${stats_db_name}.software_tmp UNION ALL SELECT *,bestlicence AS access_mode FROM ${stats_db_name}.dataset_tmp UNION ALL SELECT *,bestlicence AS access_mode FROM ${stats_db_name}.otherresearchproduct_tmp; - --- Views on final tables -CREATE OR REPLACE VIEW ${stats_db_name}.result_datasources AS SELECT * FROM ${stats_db_name}.publication_datasources UNION ALL SELECT * FROM ${stats_db_name}.software_datasources UNION ALL SELECT * FROM ${stats_db_name}.dataset_datasources UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_datasources; - -CREATE OR REPLACE VIEW ${stats_db_name}.result_citations AS SELECT * FROM ${stats_db_name}.publication_citations UNION ALL SELECT * FROM ${stats_db_name}.software_citations UNION ALL SELECT * FROM ${stats_db_name}.dataset_citations UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_citations; - -CREATE OR REPLACE VIEW ${stats_db_name}.result_classifications AS SELECT * FROM ${stats_db_name}.publication_classifications UNION ALL SELECT * FROM ${stats_db_name}.software_classifications UNION ALL SELECT * FROM ${stats_db_name}.dataset_classifications UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_classifications; - -CREATE OR REPLACE VIEW ${stats_db_name}.result_concepts AS SELECT * FROM ${stats_db_name}.publication_concepts UNION ALL SELECT * FROM ${stats_db_name}.software_concepts UNION ALL SELECT * FROM ${stats_db_name}.dataset_concepts UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_concepts; - -CREATE OR REPLACE VIEW ${stats_db_name}.result_languages AS SELECT * FROM ${stats_db_name}.publication_languages UNION ALL SELECT * FROM ${stats_db_name}.software_languages UNION ALL SELECT * FROM ${stats_db_name}.dataset_languages UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_languages; - -CREATE OR REPLACE VIEW ${stats_db_name}.result_oids AS SELECT * FROM ${stats_db_name}.publication_oids UNION ALL SELECT * FROM ${stats_db_name}.software_oids UNION ALL SELECT * FROM ${stats_db_name}.dataset_oids UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_oids; - -CREATE OR REPLACE VIEW ${stats_db_name}.result_pids AS SELECT * FROM ${stats_db_name}.publication_pids UNION ALL SELECT * FROM ${stats_db_name}.software_pids UNION ALL SELECT * FROM ${stats_db_name}.dataset_pids UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_pids; - -CREATE OR REPLACE VIEW ${stats_db_name}.result_topics AS SELECT * FROM ${stats_db_name}.publication_topics UNION ALL SELECT * FROM ${stats_db_name}.software_topics UNION ALL SELECT * FROM ${stats_db_name}.dataset_topics UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_topics; - -DROP TABLE IF EXISTS ${stats_db_name}.result_organization; -CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='resultOrganization'; - -DROP TABLE IF EXISTS ${stats_db_name}.result_projects; -CREATE TABLE ${stats_db_name}.result_projects AS select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend FROM ${stats_db_name}.result r JOIN ${stats_db_name}.project_results pr ON r.id=pr.result JOIN ${stats_db_name}.project_tmp p ON p.id=pr.id; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql deleted file mode 100644 index 4e13b3dd8..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ /dev/null @@ -1,58 +0,0 @@ --- noinspection SqlNoDataSourceInspectionForFile - ------------------------------------------------------------- ------------------------------------------------------------- --- Datasource table/view and Datasource related tables/views ------------------------------------------------------------- ------------------------------------------------------------- - --- Datasource table creation & update -------------------------------------- --- Creating and populating temporary datasource table -DROP TABLE IF EXISTS ${stats_db_name}.datasource_tmp; -CREATE TABLE ${stats_db_name}.datasource_tmp(`id` string, `name` STRING, `type` STRING, `dateofvalidation` STRING, `yearofvalidation` string, `harvested` BOOLEAN, `piwik_id` INT, `latitude` STRING, `longitude`STRING, `websiteurl` STRING, `compatibility` STRING) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties('transactional'='true'); - --- Insert statement that takes into account the piwik_id of the openAIRE graph -INSERT INTO ${stats_db_name}.datasource_tmp -SELECT substr(d1.id, 4) AS id, officialname.value AS name, -datasourcetype.classname AS type, dateofvalidation.value AS dateofvalidation, date_format(d1.dateofvalidation.value,'yyyy') AS yearofvalidation, -FALSE AS harvested, -CASE WHEN d2.piwik_id IS NULL THEN 0 ELSE d2.piwik_id END AS piwik_id, -d1.latitude.value AS latitude, d1.longitude.value AS longitude, -d1.websiteurl.value AS websiteurl, d1.openairecompatibility.classid AS compatibility -FROM ${openaire_db_name}.datasource d1 -LEFT OUTER JOIN -(SELECT id, split(originalidd, '\\:')[1] as piwik_id -FROM ${openaire_db_name}.datasource -LATERAL VIEW EXPLODE(originalid) temp AS originalidd -WHERE originalidd like "piwik:%") AS d2 -ON d1.id = d2.id -WHERE d1.datainfo.deletedbyinference=FALSE; - --- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table. --- Creating a temporary dual table that will be removed after the following insert -CREATE TABLE ${stats_db_name}.dual(dummy CHAR(1)); -INSERT INTO ${stats_db_name}.dual VALUES('X'); -INSERT INTO ${stats_db_name}.datasource_tmp (`id`, `name`, `type`, `dateofvalidation`, `yearofvalidation`, `harvested`, `piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`) -SELECT 'other', 'Other', 'Repository', NULL, NULL, false, 0, NULL, NULL, NULL, 'unknown' FROM ${stats_db_name}.dual WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name='Unknown Repository'); -DROP TABLE ${stats_db_name}.dual; - -UPDATE ${stats_db_name}.datasource_tmp SET name='Other' WHERE name='Unknown Repository'; -UPDATE ${stats_db_name}.datasource_tmp SET yearofvalidation=null WHERE yearofvalidation='-1'; - -DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages; -CREATE TABLE ${stats_db_name}.datasource_languages AS SELECT substr(d.id, 4) AS id, langs.languages AS language FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages; - -DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids; -CREATE TABLE ${stats_db_name}.datasource_oids AS SELECT substr(d.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids; - -DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations; -CREATE TABLE ${stats_db_name}.datasource_organizations AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='datasourceOrganization'; - --- datasource sources: --- where the datasource info have been collected from. -create table if not exists ${stats_db_name}.datasource_sources AS select substr(d.id,4) as id, substr(cf.key, 4) as datasource from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf where d.datainfo.deletedbyinference=false; - -CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS SELECT datasource AS id, id AS result FROM ${stats_db_name}.result_datasources; - - diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql deleted file mode 100644 index a918e4de4..000000000 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql +++ /dev/null @@ -1,12 +0,0 @@ ----------------------------------------------------------------- ----------------------------------------------------------------- --- Organization table/view and Organization related tables/views ----------------------------------------------------------------- ----------------------------------------------------------------- -DROP TABLE IF EXISTS ${stats_db_name}.organization; -CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization AS SELECT substr(o.id, 4) as id, o.legalname.value as name, o.legalshortname.value as legalshortname, o.country.classid as country -FROM ${openaire_db_name}.organization o WHERE o.datainfo.deletedbyinference=FALSE; - -CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS SELECT organization AS id, id AS datasource FROM ${stats_db_name}.datasource_organizations; - -CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects AS SELECT id AS project, organization as id FROM ${stats_db_name}.project_organizations; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/updateProductionViews.sql b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/updateProductionViews.sql new file mode 100644 index 000000000..48f8d58fd --- /dev/null +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/updateProductionViews.sql @@ -0,0 +1,207 @@ +------------------------------------------------------ +------------------------------------------------------ +-- Shadow schema table exchange +------------------------------------------------------ +------------------------------------------------------ + +-- Dropping old views +DROP VIEW IF EXISTS ${stats_db_production_name}.category; +DROP VIEW IF EXISTS ${stats_db_production_name}.concept; +DROP VIEW IF EXISTS ${stats_db_production_name}.context; +DROP VIEW IF EXISTS ${stats_db_production_name}.country; +DROP VIEW IF EXISTS ${stats_db_production_name}.countrygdp; +DROP VIEW IF EXISTS ${stats_db_production_name}.creation_date; +DROP VIEW IF EXISTS ${stats_db_production_name}.dataset; +DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_citations; +DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_classifications; +DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_concepts; +DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_datasources; +DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_languages; +DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_licenses; +DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_oids; +DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_pids; +DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_refereed; +DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_sources; +DROP VIEW IF EXISTS ${stats_db_production_name}.dataset_topics; +DROP VIEW IF EXISTS ${stats_db_production_name}.datasource; +DROP VIEW IF EXISTS ${stats_db_production_name}.datasource_languages; +DROP VIEW IF EXISTS ${stats_db_production_name}.datasource_oids; +DROP VIEW IF EXISTS ${stats_db_production_name}.datasource_organizations; +DROP VIEW IF EXISTS ${stats_db_production_name}.datasource_results; +DROP VIEW IF EXISTS ${stats_db_production_name}.datasource_sources; +DROP VIEW IF EXISTS ${stats_db_production_name}.funder; +DROP VIEW IF EXISTS ${stats_db_production_name}.fundref; +DROP VIEW IF EXISTS ${stats_db_production_name}.numbers_country; +DROP VIEW IF EXISTS ${stats_db_production_name}.organization; +DROP VIEW IF EXISTS ${stats_db_production_name}.organization_datasources; +DROP VIEW IF EXISTS ${stats_db_production_name}.organization_pids; +DROP VIEW IF EXISTS ${stats_db_production_name}.organization_projects; +DROP VIEW IF EXISTS ${stats_db_production_name}.organization_sources; +DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct; +DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_citations; +DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_classifications; +DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_concepts; +DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_datasources; +DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_languages; +DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_licenses; +DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_oids; +DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_pids; +DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_refereed; +DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_sources; +DROP VIEW IF EXISTS ${stats_db_production_name}.otherresearchproduct_topics; +DROP VIEW IF EXISTS ${stats_db_production_name}.project; +DROP VIEW IF EXISTS ${stats_db_production_name}.project_oids; +DROP VIEW IF EXISTS ${stats_db_production_name}.project_organizations; +DROP VIEW IF EXISTS ${stats_db_production_name}.project_results; +DROP VIEW IF EXISTS ${stats_db_production_name}.project_resultcount; +DROP VIEW IF EXISTS ${stats_db_production_name}.project_results_publication; +DROP VIEW IF EXISTS ${stats_db_production_name}.publication; +DROP VIEW IF EXISTS ${stats_db_production_name}.publication_citations; +DROP VIEW IF EXISTS ${stats_db_production_name}.publication_classifications; +DROP VIEW IF EXISTS ${stats_db_production_name}.publication_concepts; +DROP VIEW IF EXISTS ${stats_db_production_name}.publication_datasources; +DROP VIEW IF EXISTS ${stats_db_production_name}.publication_languages; +DROP VIEW IF EXISTS ${stats_db_production_name}.publication_licenses; +DROP VIEW IF EXISTS ${stats_db_production_name}.publication_oids; +DROP VIEW IF EXISTS ${stats_db_production_name}.publication_pids; +DROP VIEW IF EXISTS ${stats_db_production_name}.publication_refereed; +DROP VIEW IF EXISTS ${stats_db_production_name}.publication_sources; +DROP VIEW IF EXISTS ${stats_db_production_name}.publication_topics; +DROP VIEW IF EXISTS ${stats_db_production_name}.result; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_affiliated_country; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_citations; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_classifications; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_concepts; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_datasources; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_deposited_country; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_fundercount; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_gold; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_greenoa; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_languages; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_licenses; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_oids; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_organization; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_peerreviewed; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_pids; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_projectcount; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_projects; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_refereed; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_sources; +DROP VIEW IF EXISTS ${stats_db_production_name}.result_topics; +DROP VIEW IF EXISTS ${stats_db_production_name}.rndexpediture; +DROP VIEW IF EXISTS ${stats_db_production_name}.roarmap; +DROP VIEW IF EXISTS ${stats_db_production_name}.software; +DROP VIEW IF EXISTS ${stats_db_production_name}.software_citations; +DROP VIEW IF EXISTS ${stats_db_production_name}.software_classifications; +DROP VIEW IF EXISTS ${stats_db_production_name}.software_concepts; +DROP VIEW IF EXISTS ${stats_db_production_name}.software_datasources; +DROP VIEW IF EXISTS ${stats_db_production_name}.software_languages; +DROP VIEW IF EXISTS ${stats_db_production_name}.software_licenses; +DROP VIEW IF EXISTS ${stats_db_production_name}.software_oids; +DROP VIEW IF EXISTS ${stats_db_production_name}.software_pids; +DROP VIEW IF EXISTS ${stats_db_production_name}.software_refereed; +DROP VIEW IF EXISTS ${stats_db_production_name}.software_sources; +DROP VIEW IF EXISTS ${stats_db_production_name}.software_topics; + + +-- Creating the shadow database, in case it doesn't exist +CREATE database IF NOT EXISTS ${stats_db_production_name}; + +-- Creating new views +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.category AS SELECT * FROM ${stats_db_name}.category; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.concept AS SELECT * FROM ${stats_db_name}.concept; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.context AS SELECT * FROM ${stats_db_name}.context; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.country AS SELECT * FROM ${stats_db_name}.country; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.countrygdp AS SELECT * FROM ${stats_db_name}.countrygdp; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.creation_date AS SELECT * FROM ${stats_db_name}.creation_date; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset AS SELECT * FROM ${stats_db_name}.dataset; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_citations AS SELECT * FROM ${stats_db_name}.dataset_citations; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_classifications AS SELECT * FROM ${stats_db_name}.dataset_classifications; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_concepts AS SELECT * FROM ${stats_db_name}.dataset_concepts; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_datasources AS SELECT * FROM ${stats_db_name}.dataset_datasources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_languages AS SELECT * FROM ${stats_db_name}.dataset_languages; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_licenses AS SELECT * FROM ${stats_db_name}.dataset_licenses; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_oids AS SELECT * FROM ${stats_db_name}.dataset_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_pids AS SELECT * FROM ${stats_db_name}.dataset_pids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_refereed AS SELECT * FROM ${stats_db_name}.dataset_refereed; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_sources AS SELECT * FROM ${stats_db_name}.dataset_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.dataset_topics AS SELECT * FROM ${stats_db_name}.dataset_topics; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.datasource AS SELECT * FROM ${stats_db_name}.datasource; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.datasource_languages AS SELECT * FROM ${stats_db_name}.datasource_languages; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.datasource_oids AS SELECT * FROM ${stats_db_name}.datasource_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.datasource_organizations AS SELECT * FROM ${stats_db_name}.datasource_organizations; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.datasource_results AS SELECT * FROM ${stats_db_name}.datasource_results; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.datasource_sources AS SELECT * FROM ${stats_db_name}.datasource_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.funder AS SELECT * FROM ${stats_db_name}.funder; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.fundref AS SELECT * FROM ${stats_db_name}.fundref; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.numbers_country AS SELECT * FROM ${stats_db_name}.numbers_country; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.organization AS SELECT * FROM ${stats_db_name}.organization; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.organization_datasources AS SELECT * FROM ${stats_db_name}.organization_datasources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.organization_pids AS SELECT * FROM ${stats_db_name}.organization_pids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.organization_projects AS SELECT * FROM ${stats_db_name}.organization_projects; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.organization_sources AS SELECT * FROM ${stats_db_name}.organization_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct AS SELECT * FROM ${stats_db_name}.otherresearchproduct; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_citations AS SELECT * FROM ${stats_db_name}.otherresearchproduct_citations; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_classifications AS SELECT * FROM ${stats_db_name}.otherresearchproduct_classifications; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_concepts AS SELECT * FROM ${stats_db_name}.otherresearchproduct_concepts; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_datasources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_datasources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_languages AS SELECT * FROM ${stats_db_name}.otherresearchproduct_languages; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_licenses AS SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_oids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_pids AS SELECT * FROM ${stats_db_name}.otherresearchproduct_pids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_refereed AS SELECT * FROM ${stats_db_name}.otherresearchproduct_refereed; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_sources AS SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.otherresearchproduct_topics AS SELECT * FROM ${stats_db_name}.otherresearchproduct_topics; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.project AS SELECT * FROM ${stats_db_name}.project; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.project_oids AS SELECT * FROM ${stats_db_name}.project_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.project_organizations AS SELECT * FROM ${stats_db_name}.project_organizations; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.project_results AS SELECT * FROM ${stats_db_name}.project_results; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.project_resultcount AS SELECT * FROM ${stats_db_name}.project_resultcount; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.project_results_publication AS SELECT * FROM ${stats_db_name}.project_results_publication; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication AS SELECT * FROM ${stats_db_name}.publication; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_citations AS SELECT * FROM ${stats_db_name}.publication_citations; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_classifications AS SELECT * FROM ${stats_db_name}.publication_classifications; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_concepts AS SELECT * FROM ${stats_db_name}.publication_concepts; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_datasources AS SELECT * FROM ${stats_db_name}.publication_datasources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_languages AS SELECT * FROM ${stats_db_name}.publication_languages; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_licenses AS SELECT * FROM ${stats_db_name}.publication_licenses; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_oids AS SELECT * FROM ${stats_db_name}.publication_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_pids AS SELECT * FROM ${stats_db_name}.publication_pids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_refereed AS SELECT * FROM ${stats_db_name}.publication_refereed; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_sources AS SELECT * FROM ${stats_db_name}.publication_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.publication_topics AS SELECT * FROM ${stats_db_name}.publication_topics; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result AS SELECT * FROM ${stats_db_name}.result; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_affiliated_country AS SELECT * FROM ${stats_db_name}.result_affiliated_country; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_citations AS SELECT * FROM ${stats_db_name}.result_citations; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_classifications AS SELECT * FROM ${stats_db_name}.result_classifications; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_concepts AS SELECT * FROM ${stats_db_name}.result_concepts; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_datasources AS SELECT * FROM ${stats_db_name}.result_datasources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_deposited_country AS SELECT * FROM ${stats_db_name}.result_deposited_country; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_fundercount AS SELECT * FROM ${stats_db_name}.result_fundercount; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_gold AS SELECT * FROM ${stats_db_name}.result_gold; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_greenoa AS SELECT * FROM ${stats_db_name}.result_greenoa; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_languages AS SELECT * FROM ${stats_db_name}.result_languages; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_licenses AS SELECT * FROM ${stats_db_name}.result_licenses; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_oids AS SELECT * FROM ${stats_db_name}.result_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_organization AS SELECT * FROM ${stats_db_name}.result_organization; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_peerreviewed AS SELECT * FROM ${stats_db_name}.result_peerreviewed; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_pids AS SELECT * FROM ${stats_db_name}.result_pids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_projectcount AS SELECT * FROM ${stats_db_name}.result_projectcount; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_projects AS SELECT * FROM ${stats_db_name}.result_projects; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_refereed AS SELECT * FROM ${stats_db_name}.result_refereed; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_sources AS SELECT * FROM ${stats_db_name}.result_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.result_topics AS SELECT * FROM ${stats_db_name}.result_topics; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.rndexpediture AS SELECT * FROM ${stats_db_name}.rndexpediture; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.roarmap AS SELECT * FROM ${stats_db_name}.roarmap; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software AS SELECT * FROM ${stats_db_name}.software; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_citations AS SELECT * FROM ${stats_db_name}.software_citations; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_classifications AS SELECT * FROM ${stats_db_name}.software_classifications; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_concepts AS SELECT * FROM ${stats_db_name}.software_concepts; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_datasources AS SELECT * FROM ${stats_db_name}.software_datasources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_languages AS SELECT * FROM ${stats_db_name}.software_languages; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_licenses AS SELECT * FROM ${stats_db_name}.software_licenses; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_oids AS SELECT * FROM ${stats_db_name}.software_oids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_pids AS SELECT * FROM ${stats_db_name}.software_pids; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_refereed AS SELECT * FROM ${stats_db_name}.software_refereed; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_sources AS SELECT * FROM ${stats_db_name}.software_sources; +CREATE VIEW IF NOT EXISTS ${stats_db_production_name}.software_topics AS SELECT * FROM ${stats_db_name}.software_topics; diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 324e6f9a1..ae2318238 100644 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -4,10 +4,6 @@ stats_db_name the target stats database name - - stats_db_shadow_name - the name of the shadow schema - stats_db_production_name the name of the production schema @@ -41,262 +37,47 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + ${hive_jdbc_url} - + stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_production_name=${stats_db_production_name} - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - external_stats_db_name=${external_stats_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - external_stats_db_name=${external_stats_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - + - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - stats_db_shadow_name=${stats_db_shadow_name} - - - - - - + ${jobTracker} ${nameNode} impala-shell.sh - ${stats_db_name} - step18.sql - ${wf:appPath()}/scripts/step18.sql + ${stats_db_production_name} + computeProductionStats.sql + ${wf:appPath()}/scripts/computeProductionStats.sql impala-shell.sh - - - - - - - ${jobTracker} - ${nameNode} - impala-shell.sh - ${stats_db_shadow_name} - step19.sql - ${wf:appPath()}/scripts/step19.sql - impala-shell.sh - - + - + ${jobTracker} ${nameNode} - updateCache.sh + promoteCache.sh ${stats_tool_api_url} - updateCache.sh + promoteCache.sh - -
+ \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 0b6a00df1..451461669 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -255,7 +255,7 @@ ${hive_jdbc_url} - + stats_db_name=${stats_db_name} stats_db_shadow_name=${stats_db_shadow_name} @@ -283,8 +283,8 @@ ${nameNode} impala-shell.sh ${stats_db_shadow_name} - step19.sql - ${wf:appPath()}/scripts/step19.sql + computeProductionStats.sql + ${wf:appPath()}/scripts/computeProductionStats.sql impala-shell.sh From 4c58bd1c93297bfc0e99b6e82268fa86f3d83067 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 3 Dec 2020 11:24:00 +0100 Subject: [PATCH 089/108] merge with upstream --- .../dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index 1bfb79dca..238375197 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -5,7 +5,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; - import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; From ea88dc3401e656682c8c13343684fa05a5e5e692 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 3 Dec 2020 11:24:23 +0100 Subject: [PATCH 090/108] fixed issue in property name --- .../dhp/oa/graph/dump/community/oozie_app/workflow.xml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml index 161fd2dec..fcef2547a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml @@ -26,8 +26,8 @@ the metadata associated to the deposition - newDeposition - true if it is a brand new depositon. false for new version of an old deposition + depositionType + one among {new, update, version} conceptRecordId @@ -419,6 +419,7 @@ --metadata${metadata} --communityMapPath${workingDir}/communityMap --conceptRecordId${conceptRecordId} + --depositionId${depositionId} --depositionType${depositionType} From d23ccae0d5c0ad735b9eaad594ffc07199922680 Mon Sep 17 00:00:00 2001 From: antleb Date: Fri, 4 Dec 2020 12:42:17 +0200 Subject: [PATCH 091/108] ignoring deletedbyinference relations --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql | 2 +- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql | 2 +- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 21a944164..b4745535d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -11,7 +11,7 @@ CREATE TABLE ${stats_db_name}.project_oids AS SELECT substr(p.id, 4) AS id, oids -- Project_organizations Table DROP TABLE IF EXISTS ${stats_db_name}.project_organizations; -CREATE TABLE ${stats_db_name}.project_organizations AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype='projectOrganization'; +CREATE TABLE ${stats_db_name}.project_organizations AS SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization from ${openaire_db_name}.relation r WHERE r.reltype='projectOrganization' and r.datainfo.deletedbyinference=false; -- Project_results Table DROP TABLE IF EXISTS ${stats_db_name}.project_results; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index 7acabf1dd..36a4a8a49 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -25,7 +25,7 @@ CREATE OR REPLACE VIEW ${stats_db_name}.result_pids AS SELECT * FROM ${stats_db_ CREATE OR REPLACE VIEW ${stats_db_name}.result_topics AS SELECT * FROM ${stats_db_name}.publication_topics UNION ALL SELECT * FROM ${stats_db_name}.software_topics UNION ALL SELECT * FROM ${stats_db_name}.dataset_topics UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_topics; DROP TABLE IF EXISTS ${stats_db_name}.result_organization; -CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='resultOrganization'; +CREATE TABLE ${stats_db_name}.result_organization AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='resultOrganization' and r.datainfo.deletedbyinference=false; DROP TABLE IF EXISTS ${stats_db_name}.result_projects; CREATE TABLE ${stats_db_name}.result_projects AS select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend FROM ${stats_db_name}.result r JOIN ${stats_db_name}.project_results pr ON r.id=pr.result JOIN ${stats_db_name}.project_tmp p ON p.id=pr.id; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index 4e13b3dd8..197047c8b 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -47,7 +47,7 @@ DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids; CREATE TABLE ${stats_db_name}.datasource_oids AS SELECT substr(d.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids; DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations; -CREATE TABLE ${stats_db_name}.datasource_organizations AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='datasourceOrganization'; +CREATE TABLE ${stats_db_name}.datasource_organizations AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype='datasourceOrganization' and r.datainfo.deletedbyinference=false; -- datasource sources: -- where the datasource info have been collected from. From 7cb113e088375f7b66e743ee48044f914bebbaee Mon Sep 17 00:00:00 2001 From: antleb Date: Fri, 4 Dec 2020 13:04:25 +0200 Subject: [PATCH 092/108] added the new parameter (stats_tool_api_url) in the workflow parameters --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 451461669..dcd034166 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -17,6 +17,10 @@ stats_db_shadow_name the name of the shadow schema + + stats_tool_api_url + The url of the API of the stats tool. Is used to trigger the cache update. + hive_metastore_uris hive server metastore URIs From b1ed1afdcc90cfa856d71b6efcf0ec910cc86cf0 Mon Sep 17 00:00:00 2001 From: antleb Date: Fri, 4 Dec 2020 13:07:18 +0200 Subject: [PATCH 093/108] added the new parameter (stats_tool_api_url) in the workflow parameters --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index ae2318238..d744f18da 100644 --- a/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-promote/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -8,6 +8,10 @@ stats_db_production_name the name of the production schema + + stats_tool_api_url + The url of the API of the stats tool. Is used to trigger the cache promote. + hive_metastore_uris hive server metastore URIs From a104a632dfaeda2b95c41a1baf0fc63d7f8cceec Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 4 Dec 2020 16:32:47 +0100 Subject: [PATCH 094/108] cleanup --- .../eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java index 2a6fd3a1d..fbb20f143 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java @@ -190,15 +190,6 @@ public class CleaningFunctions { } } - final Set collectedFrom = Optional - .ofNullable(r.getCollectedfrom()) - .map( - c -> c - .stream() - .map(KeyValue::getKey) - .collect(Collectors.toCollection(HashSet::new))) - .orElse(new HashSet<>()); - for (Author a : r.getAuthor()) { if (Objects.isNull(a.getPid())) { a.setPid(Lists.newArrayList()); From b31dd126fbb40a793cfe7284fa25fc160765f403 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 7 Dec 2020 10:42:38 +0100 Subject: [PATCH 095/108] fixed crossref workflow added common ORCID Class --- dhp-schemas/pom.xml | 2 +- .../eu/dnetlib/dhp/schema/orcid/OrcidDOI.java | 24 ++++ .../doiboost/crossref/Crossref2Oaf.scala | 4 +- .../doiboost/crossref/CrossrefDataset.scala | 38 ++++--- .../crossref/SparkMapDumpIntoOAF.scala | 70 +----------- .../doiboost/crossref/oozie_app/workflow.xml | 106 +++++++++--------- .../doiboost/crossref_to_dataset_params.json | 3 +- dhp-workflows/pom.xml | 2 +- 8 files changed, 106 insertions(+), 143 deletions(-) create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml index b04d62dd2..73efeabb4 100644 --- a/dhp-schemas/pom.xml +++ b/dhp-schemas/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp 1.2.4-SNAPSHOT - ../ + ../pom.xml dhp-schemas diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java new file mode 100644 index 000000000..11bce26c8 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java @@ -0,0 +1,24 @@ +package eu.dnetlib.dhp.schema.orcid; + +import java.util.List; + +public class OrcidDOI { + private String doi; + private List authors; + + public String getDoi() { + return doi; + } + + public void setDoi(String doi) { + this.doi = doi; + } + + public List getAuthors() { + return authors; + } + + public void setAuthors(List authors) { + this.authors = authors; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 1adb7465e..5ba01357e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -200,7 +200,7 @@ case object Crossref2Oaf { a.setSurname(family) a.setFullname(s"$given $family") if (StringUtils.isNotBlank(orcid)) - a.setPid(List(createSP(orcid, ORCID, PID_TYPES)).asJava) + a.setPid(List(createSP(orcid, ORCID, PID_TYPES, generateDataInfo())).asJava) a } @@ -248,7 +248,7 @@ case object Crossref2Oaf { def snsfRule(award:String): String = { - var tmp1 = StringUtils.substringAfter(award,"_") + val tmp1 = StringUtils.substringAfter(award,"_") val tmp2 = StringUtils.substringBefore(tmp1,"/") logger.debug(s"From $award to $tmp2") tmp2 diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala index 996ba5585..4a39a2987 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala @@ -2,6 +2,7 @@ package eu.dnetlib.doiboost.crossref import eu.dnetlib.dhp.application.ArgumentApplicationParser import org.apache.commons.io.IOUtils +import org.apache.hadoop.io.{IntWritable, Text} import org.apache.spark.SparkConf import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} @@ -12,21 +13,23 @@ import org.slf4j.{Logger, LoggerFactory} object CrossrefDataset { + val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) - def extractTimestamp(input:String): Long = { + + def to_item(input:String):CrossrefDT = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) - - (json\"indexed"\"timestamp").extractOrElse[Long](0) + val ts:Long = (json \ "indexed" \ "timestamp").extract[Long] + val doi:String = (json \ "DOI").extract[String] + CrossrefDT(doi, input, ts) } - def main(args: Array[String]): Unit = { - val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) + val conf: SparkConf = new SparkConf() val parser = new ArgumentApplicationParser(IOUtils.toString(CrossrefDataset.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json"))) parser.parseArgument(args) @@ -49,9 +52,8 @@ object CrossrefDataset { if (a == null) return b - val tb = extractTimestamp(b.json) - val ta = extractTimestamp(a.json) - if(ta >tb) { + + if(a.timestamp >b.timestamp) { return a } b @@ -63,9 +65,7 @@ object CrossrefDataset { if (a == null) return b - val tb = extractTimestamp(b.json) - val ta = extractTimestamp(a.json) - if(ta >tb) { + if(a.timestamp >b.timestamp) { return a } b @@ -78,15 +78,21 @@ object CrossrefDataset { override def finish(reduction: CrossrefDT): CrossrefDT = reduction } - val sourcePath:String = parser.get("sourcePath") - val targetPath:String = parser.get("targetPath") + val workingPath:String = parser.get("workingPath") - val ds:Dataset[CrossrefDT] = spark.read.load(sourcePath).as[CrossrefDT] - ds.groupByKey(_.doi) + val main_ds:Dataset[CrossrefDT] = spark.read.load(s"$workingPath/crossref_ds").as[CrossrefDT] + + + val update = + spark.createDataset(spark.sparkContext.sequenceFile(s"$workingPath/index_update", classOf[IntWritable], classOf[Text]) + .map(i =>CrossrefImporter.decompressBlob(i._2.toString)) + .map(i =>to_item(i))) + + main_ds.union(update).groupByKey(_.doi) .agg(crossrefAggregator.toColumn) .map(s=>s._2) - .write.mode(SaveMode.Overwrite).save(targetPath) + .write.mode(SaveMode.Overwrite).save(s"$workingPath/crossref_ds_updated") } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala index 08319058c..0272cb1a6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala @@ -34,85 +34,21 @@ object SparkMapDumpIntoOAF { implicit val mapEncoderRelatons: Encoder[Relation] = Encoders.kryo[Relation] implicit val mapEncoderDatasets: Encoder[oaf.Dataset] = Encoders.kryo[OafDataset] - val sc = spark.sparkContext val targetPath = parser.get("targetPath") import spark.implicits._ - spark.read.load(parser.get("sourcePath")).as[CrossrefDT] .flatMap(k => Crossref2Oaf.convert(k.json)) .filter(o => o != null) .write.mode(SaveMode.Overwrite).save(s"$targetPath/mixObject") - val ds:Dataset[Oaf] = spark.read.load(s"$targetPath/mixObject").as[Oaf] - ds.filter(o => o.isInstanceOf[Publication]).map(o => o.asInstanceOf[Publication]).write.save(s"$targetPath/publication") + ds.filter(o => o.isInstanceOf[Publication]).map(o => o.asInstanceOf[Publication]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefPublication") - ds.filter(o => o.isInstanceOf[Relation]).map(o => o.asInstanceOf[Relation]).write.save(s"$targetPath/relation") + ds.filter(o => o.isInstanceOf[Relation]).map(o => o.asInstanceOf[Relation]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefRelation") - ds.filter(o => o.isInstanceOf[OafDataset]).map(o => o.asInstanceOf[OafDataset]).write.save(s"$targetPath/dataset") - - - -// -// -// -// sc.sequenceFile(parser.get("sourcePath"), classOf[IntWritable], classOf[Text]) -// .map(k => k._2.toString).map(CrossrefImporter.decompressBlob) -// .flatMap(k => Crossref2Oaf.convert(k)).saveAsObjectFile(s"${targetPath}/mixObject") -// -// val inputRDD = sc.objectFile[Oaf](s"${targetPath}/mixObject").filter(p=> p!= null) -// -// val distinctPubs:RDD[Publication] = inputRDD.filter(k => k != null && k.isInstanceOf[Publication]) -// .map(k => k.asInstanceOf[Publication]).map { p: Publication => Tuple2(p.getId, p) }.reduceByKey { case (p1: Publication, p2: Publication) => -// var r = if (p1 == null) p2 else p1 -// if (p1 != null && p2 != null) { -// if (p1.getLastupdatetimestamp != null && p2.getLastupdatetimestamp != null) { -// if (p1.getLastupdatetimestamp < p2.getLastupdatetimestamp) -// r = p2 -// else -// r = p1 -// } else { -// r = if (p1.getLastupdatetimestamp == null) p2 else p1 -// } -// } -// r -// }.map(_._2) -// -// val pubs:Dataset[Publication] = spark.createDataset(distinctPubs) -// pubs.write.mode(SaveMode.Overwrite).save(s"${targetPath}/publication") -// -// -// val distincDatasets:RDD[OafDataset] = inputRDD.filter(k => k != null && k.isInstanceOf[OafDataset]) -// .map(k => k.asInstanceOf[OafDataset]).map(p => Tuple2(p.getId, p)).reduceByKey { case (p1: OafDataset, p2: OafDataset) => -// var r = if (p1 == null) p2 else p1 -// if (p1 != null && p2 != null) { -// if (p1.getLastupdatetimestamp != null && p2.getLastupdatetimestamp != null) { -// if (p1.getLastupdatetimestamp < p2.getLastupdatetimestamp) -// r = p2 -// else -// r = p1 -// } else { -// r = if (p1.getLastupdatetimestamp == null) p2 else p1 -// } -// } -// r -// }.map(_._2) -// -// spark.createDataset(distincDatasets).write.mode(SaveMode.Overwrite).save(s"${targetPath}/dataset") -// -// -// -// val distinctRels =inputRDD.filter(k => k != null && k.isInstanceOf[Relation]) -// .map(k => k.asInstanceOf[Relation]).map(r=> (s"${r.getSource}::${r.getTarget}",r)) -// .reduceByKey { case (p1: Relation, p2: Relation) => -// if (p1 == null) p2 else p1 -// }.map(_._2) -// -// val rels: Dataset[Relation] = spark.createDataset(distinctRels) -// -// rels.write.mode(SaveMode.Overwrite).save(s"${targetPath}/relations") + ds.filter(o => o.isInstanceOf[OafDataset]).map(o => o.asInstanceOf[OafDataset]).write.mode(SaveMode.Overwrite).save(s"$targetPath/crossrefDataset") } diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml index a9cc9ea3c..63c2e9ef2 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml @@ -16,88 +16,86 @@ sparkExecutorCores number of cores used by single executor - - - - + + timestamp + Timestamp for incremental Harvesting + - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.crossref.CrossrefImporter + -t${workingPath}/input/crossref/index_update + -n${nameNode} + -ts${timestamp} + + + + + + + + yarn-cluster + cluster + ExtractCrossrefToOAF + eu.dnetlib.doiboost.crossref.CrossrefDataset + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + ${sparkExtraOPT} + + --workingPath/data/doiboost/input/crossref + --masteryarn-cluster + + + + + + + + + + + + + - - - - - - - - - - - - - - - - + yarn-cluster cluster - ExtractCrossrefToOAF + ConvertCrossrefToOAF eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF dhp-doiboost-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 ${sparkExtraOPT} --sourcePath${workingPath}/input/crossref/crossref_ds - --targetPath${workingPath}/input/crossref + --targetPath${workingPath}/process/ --masteryarn-cluster - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json index 312bd0751..23c0fdabc 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_to_dataset_params.json @@ -1,6 +1,5 @@ [ - {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the working dir path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working dir path", "paramRequired": true}, {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index f1167b184..190c9847e 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -7,7 +7,7 @@ eu.dnetlib.dhp dhp 1.2.4-SNAPSHOT - ../ + ../pom.xml dhp-workflows From 026ad4063304e31ea3e169cf40d93693912dccbb Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 7 Dec 2020 13:50:01 +0100 Subject: [PATCH 096/108] disabled test --- .../oa/provision/XmlRecordFactoryTest.java | 33 +------------------ 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index e84f97836..992ab26e8 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -5,31 +5,23 @@ import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; import java.io.StringReader; -import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -import org.mockito.Mock; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.OafMapperUtils; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; //TODO to enable it we need to update the joined_entity.json test file -//@Disabled +@Disabled public class XmlRecordFactoryTest { private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; @@ -43,27 +35,6 @@ public class XmlRecordFactoryTest { JoinedEntity je = new ObjectMapper().readValue(json, JoinedEntity.class); assertNotNull(je); - Document doc = buildXml(je); - //// TODO specific test assertion on doc - } - - @Test - void testBologna() throws IOException, DocumentException { - final String json = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.json")); - Publication oaf = new ObjectMapper().readValue(json, Publication.class); - assertNotNull(oaf); - JoinedEntity je = new JoinedEntity(); - je.setEntity(oaf); - assertNotNull(je); - - Document doc = buildXml(je); - // TODO specific test assertion on doc - - System.out.println(doc.asXML()); - - } - - private Document buildXml(JoinedEntity je) throws DocumentException { ContextMapper contextMapper = new ContextMapper(); XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, @@ -78,7 +49,5 @@ public class XmlRecordFactoryTest { assertNotNull(doc); // TODO add assertions based of values extracted from the XML record - - return doc; } } From 5de8a7276fc8018848bf5cd80c2475049b5b47ad Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Mon, 7 Dec 2020 14:56:06 +0100 Subject: [PATCH 097/108] wf to partition opendoar events --- .../broker/oa/PartitionEventsByDsIdJob.java | 28 +++++++-- .../dhp/broker/oa/od_partitions_params.json | 14 +++++ .../oozie_app/config-default.xml | 0 .../oozie_app/workflow.xml | 59 ++----------------- 4 files changed, 42 insertions(+), 59 deletions(-) create mode 100644 dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/od_partitions_params.json rename dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/{partial => opendoarPartition}/oozie_app/config-default.xml (100%) rename dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/{partial => opendoarPartition}/oozie_app/workflow.xml (61%) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java index da2c5bb78..65d5e6f94 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java @@ -4,8 +4,13 @@ package eu.dnetlib.dhp.broker.oa; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; @@ -29,15 +34,14 @@ import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; public class PartitionEventsByDsIdJob { private static final Logger log = LoggerFactory.getLogger(PartitionEventsByDsIdJob.class); - private static final String OPENDOAR_NSPREFIX = "opendoar____::"; + private static final String OPENDOAR_NSPREFIX = "10|opendoar____::"; public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString( - PartitionEventsByDsIdJob.class - .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json"))); + .toString(PartitionEventsByDsIdJob.class + .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/od_partitions_params.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -54,13 +58,25 @@ public class PartitionEventsByDsIdJob { final String partitionPath = parser.get("workingPath") + "/eventsByOpendoarId"; log.info("partitionPath: {}", partitionPath); + final String opendoarIds = parser.get("opendoarIds"); + log.info("opendoarIds: {}", opendoarIds); + + final Set validOpendoarIds = new HashSet<>(); + if (!opendoarIds.trim().equals("-")) { + validOpendoarIds.addAll(Arrays.stream(opendoarIds.split(",")) + .map(String::trim) + .filter(StringUtils::isNotBlank) + .map(s -> OPENDOAR_NSPREFIX + DigestUtils.md5Hex(s)) + .collect(Collectors.toSet())); + } + runWithSparkSession(conf, isSparkSessionManaged, spark -> { ClusterUtils .readPath(spark, eventsPath, Event.class) .filter(e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId())) - .filter(e -> e.getMap().getTargetDatasourceId().contains(OPENDOAR_NSPREFIX)) - .limit(10000) + .filter(e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX)) + .filter(e -> validOpendoarIds.contains(e.getMap().getTargetDatasourceId())) .map(e -> messageFromNotification(e), Encoders.bean(ShortEventMessageWithGroupId.class)) .coalesce(1) .write() diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/od_partitions_params.json b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/od_partitions_params.json new file mode 100644 index 000000000..10ba926ab --- /dev/null +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/od_partitions_params.json @@ -0,0 +1,14 @@ +[ + { + "paramName": "o", + "paramLongName": "workingPath", + "paramDescription": "the path where the temporary data will be stored", + "paramRequired": true + }, + { + "paramName": "list", + "paramLongName": "opendoarIds", + "paramDescription": "the opendoar IDs whitelist (comma separated)", + "paramRequired": true + } +] diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/partial/oozie_app/config-default.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/partial/oozie_app/config-default.xml rename to dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/partial/oozie_app/workflow.xml b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/workflow.xml similarity index 61% rename from dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/partial/oozie_app/workflow.xml rename to dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/workflow.xml index 8bae626f1..dba3c9f73 100644 --- a/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/partial/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-broker-events/src/main/resources/eu/dnetlib/dhp/broker/oa/opendoarPartition/oozie_app/workflow.xml @@ -1,60 +1,13 @@ - + - graphInputPath - the path where the graph is stored + opendoarIds + the opendoar IDs whitelist (comma separated) workingPath the path where the the generated data will be stored - - - datasourceIdWhitelist - - - a white list (comma separeted, - for empty list) of datasource ids - - - datasourceTypeWhitelist - - - a white list (comma separeted, - for empty list) of datasource types - - - datasourceIdBlacklist - - - a black list (comma separeted, - for empty list) of datasource ids - - - esEventIndexName - the elasticsearch index name for events - - - esNotificationsIndexName - the elasticsearch index name for notifications - - - esIndexHost - the elasticsearch host - - - maxIndexedEventsForDsAndTopic - the max number of events for each couple (ds/topic) - - - brokerApiBaseUrl - the url of the broker service api - - - brokerDbUrl - the url of the broker database - - - brokerDbUser - the user of the broker database - - - brokerDbPassword - the password of the broker database sparkDriverMemory @@ -111,13 +64,13 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + yarn cluster @@ -134,8 +87,8 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=3840 - --graphPath${graphInputPath} --workingPath${workingPath} + --opendoarIds${opendoarIds} From 302baab67b3de2e8f186a001ebc69e712058649a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 7 Dec 2020 19:59:33 +0100 Subject: [PATCH 098/108] fixed doiboost mapping and workflows --- .../eu/dnetlib/dhp/schema/orcid/OrcidDOI.java | 29 ++++---- .../doiboost/SparkGenerateDoiBoost.scala | 2 +- .../mag/SparkImportMagIntoDataset.scala | 14 ++-- .../doiboost/mag/SparkPreProcessMAG.scala | 42 ++++++------ .../dnetlib/doiboost/orcid/ORCIDToOAF.scala | 27 +++++--- .../orcid/SparkConvertORCIDToOAF.scala | 66 +++++++++++++++---- .../intersection/oozie_app/workflow.xml | 9 +-- .../dhp/doiboost/mag/oozie_app/workflow.xml | 19 ++++-- .../doiboost/mag/preprocess_mag_params.json | 3 +- .../orcid/MappingORCIDToOAFTest.scala | 27 ++++++++ 10 files changed, 163 insertions(+), 75 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java index 11bce26c8..cf372c12a 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java @@ -1,24 +1,25 @@ + package eu.dnetlib.dhp.schema.orcid; import java.util.List; public class OrcidDOI { - private String doi; - private List authors; + private String doi; + private List authors; - public String getDoi() { - return doi; - } + public String getDoi() { + return doi; + } - public void setDoi(String doi) { - this.doi = doi; - } + public void setDoi(String doi) { + this.doi = doi; + } - public List getAuthors() { - return authors; - } + public List getAuthors() { + return authors; + } - public void setAuthors(List authors) { - this.authors = authors; - } + public void setAuthors(List authors) { + this.authors = authors; + } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index a29809fc0..860254527 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -62,7 +62,7 @@ object SparkGenerateDoiBoost { val orcidPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p)) fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/secondJoin") - logger.info("Phase 3) Join Result with MAG") + logger.info("Phase 4) Join Result with MAG") val sj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p)) val magPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p)) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala index f291a92f9..88fee72b7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala @@ -21,15 +21,17 @@ object SparkImportMagIntoDataset { val stream = Map( - "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), - "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), + "Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "Iso3166Code:string", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), + "AuthorExtendedAttributes" -> Tuple2("mag/AuthorExtendedAttributes.txt", Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")), + "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), + "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")), "EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")), "FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")), "FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")), - "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")), - "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")), + // ['FieldOfStudyId:long', 'Rank:uint', 'NormalizedName:string', 'DisplayName:string', 'MainType:string', 'Level:int', 'PaperCount:long', 'PaperFamilyCount:long', 'CitationCount:long', 'CreatedDate:DateTime'] + "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), + "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "CreatedDate:DateTime")), "PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")), "PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")), "PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")), @@ -39,7 +41,7 @@ object SparkImportMagIntoDataset { "PaperReferences" -> Tuple2("mag/PaperReferences.txt", Seq("PaperId:long", "PaperReferenceId:long")), "PaperResources" -> Tuple2("mag/PaperResources.txt", Seq("PaperId:long", "ResourceType:int", "ResourceUrl:string", "SourceUrl:string", "RelationshipType:int")), "PaperUrls" -> Tuple2("mag/PaperUrls.txt", Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string")), - "Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "CreatedDate:DateTime")), + "Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "OnlineDate:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "FamilyRank:uint?", "CreatedDate:DateTime")), "RelatedFieldOfStudy" -> Tuple2("advanced/RelatedFieldOfStudy.txt", Seq("FieldOfStudyId1:long", "Type1:string", "FieldOfStudyId2:long", "Type2:string", "Rank:float")) ) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkPreProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkPreProcessMAG.scala index a24f0e6bb..02dc4979a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkPreProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkPreProcessMAG.scala @@ -26,12 +26,15 @@ object SparkPreProcessMAG { .master(parser.get("master")).getOrCreate() val sourcePath = parser.get("sourcePath") + val workingPath = parser.get("workingPath") + val targetPath = parser.get("targetPath") + import spark.implicits._ implicit val mapEncoderPubs: Encoder[Publication] = org.apache.spark.sql.Encoders.kryo[Publication] implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs) logger.info("Phase 1) make uninque DOI in Papers:") - val d: Dataset[MagPapers] = spark.read.load(s"${parser.get("sourcePath")}/Papers").as[MagPapers] + val d: Dataset[MagPapers] = spark.read.load(s"$sourcePath/Papers").as[MagPapers] // Filtering Papers with DOI, and since for the same DOI we have multiple version of item with different PapersId we get the last one val result: RDD[MagPapers] = d.where(col("Doi").isNotNull) @@ -41,11 +44,12 @@ object SparkPreProcessMAG { .map(_._2) val distinctPaper: Dataset[MagPapers] = spark.createDataset(result) - distinctPaper.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/Papers_distinct") + + distinctPaper.write.mode(SaveMode.Overwrite).save(s"$workingPath/Papers_distinct") logger.info("Phase 0) Enrich Publication with description") - val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract] - pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract") + val pa = spark.read.load(s"$sourcePath/PaperAbstractsInvertedIndex").as[MagPaperAbstract] + pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"$workingPath/PaperAbstract") logger.info("Phase 3) Group Author by PaperId") val authors = spark.read.load(s"$sourcePath/Authors").as[MagAuthor] @@ -64,24 +68,24 @@ object SparkPreProcessMAG { } else mpa }).groupBy("PaperId").agg(collect_list(struct($"author", $"affiliation")).as("authors")) - .write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_1_paper_authors") + .write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_1_paper_authors") logger.info("Phase 4) create First Version of publication Entity with Paper Journal and Authors") val journals = spark.read.load(s"$sourcePath/Journals").as[MagJournal] - val papers = spark.read.load((s"${parser.get("targetPath")}/Papers_distinct")).as[MagPapers] + val papers = spark.read.load((s"$workingPath/Papers_distinct")).as[MagPapers] - val paperWithAuthors = spark.read.load(s"${parser.get("targetPath")}/merge_step_1_paper_authors").as[MagPaperWithAuthorList] + val paperWithAuthors = spark.read.load(s"$workingPath/merge_step_1_paper_authors").as[MagPaperWithAuthorList] val firstJoin = papers.joinWith(journals, papers("JournalId").equalTo(journals("JournalId")), "left") firstJoin.joinWith(paperWithAuthors, firstJoin("_1.PaperId").equalTo(paperWithAuthors("PaperId")), "left") .map { a => ConversionUtil.createOAFFromJournalAuthorPaper(a) } - .write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_2") + .write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_2") var magPubs: Dataset[(String, Publication)] = - spark.read.load(s"${parser.get("targetPath")}/merge_step_2").as[Publication] + spark.read.load(s"$workingPath/merge_step_2").as[Publication] .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] @@ -95,10 +99,10 @@ object SparkPreProcessMAG { .map(item => ConversionUtil.updatePubsWithConferenceInfo(item)) .write .mode(SaveMode.Overwrite) - .save(s"${parser.get("targetPath")}/merge_step_2_conference") + .save(s"$workingPath/merge_step_2_conference") - magPubs= spark.read.load(s"${parser.get("targetPath")}/merge_step_2_conference").as[Publication] + magPubs= spark.read.load(s"$workingPath/merge_step_2_conference").as[Publication] .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl] @@ -108,27 +112,27 @@ object SparkPreProcessMAG { magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left") .map { a: ((String, Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2)) } .write.mode(SaveMode.Overwrite) - .save(s"${parser.get("targetPath")}/merge_step_3") + .save(s"$workingPath/merge_step_3") // logger.info("Phase 6) Enrich Publication with description") // val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract] // pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract") - val paperAbstract = spark.read.load((s"${parser.get("targetPath")}/PaperAbstract")).as[MagPaperAbstract] + val paperAbstract = spark.read.load((s"$workingPath/PaperAbstract")).as[MagPaperAbstract] - magPubs = spark.read.load(s"${parser.get("targetPath")}/merge_step_3").as[Publication] + magPubs = spark.read.load(s"$workingPath/merge_step_3").as[Publication] .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left") .map(item => ConversionUtil.updatePubsWithDescription(item) - ).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_4") + ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") logger.info("Phase 7) Enrich Publication with FieldOfStudy") - magPubs = spark.read.load(s"${parser.get("targetPath")}/merge_step_4").as[Publication] + magPubs = spark.read.load(s"$workingPath/merge_step_4").as[Publication] .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] val fos = spark.read.load(s"$sourcePath/FieldsOfStudy").select($"FieldOfStudyId".alias("fos"), $"DisplayName", $"MainType") @@ -144,14 +148,14 @@ object SparkPreProcessMAG { .equalTo(paperField("PaperId")), "left") .map(item => ConversionUtil.updatePubsWithSubject(item)) .write.mode(SaveMode.Overwrite) - .save(s"${parser.get("targetPath")}/mag_publication") + .save(s"$workingPath/mag_publication") - val s:RDD[Publication] = spark.read.load(s"${parser.get("targetPath")}/mag_publication").as[Publication] + val s:RDD[Publication] = spark.read.load(s"$workingPath/mag_publication").as[Publication] .map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) .map(_._2) - spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/mag_publication_u") + spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala index f230c604f..f7255e559 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.orcid -import eu.dnetlib.dhp.schema.oaf.{Author, Publication} +import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication} +import eu.dnetlib.dhp.schema.orcid.OrcidDOI import eu.dnetlib.doiboost.DoiBoostMappingUtil import eu.dnetlib.doiboost.DoiBoostMappingUtil.{ORCID, PID_TYPES, createSP, generateDataInfo, generateIdentifier} import org.apache.commons.lang.StringUtils @@ -43,16 +44,19 @@ object ORCIDToOAF { } - def convertTOOAF(input:ORCIDElement) :Publication = { - val doi = input.doi + def convertTOOAF(input:OrcidDOI) :Publication = { + val doi = input.getDoi val pub:Publication = new Publication - pub.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava) + pub.setPid(List(createSP(doi.toLowerCase, "doi", PID_TYPES)).asJava) pub.setDataInfo(generateDataInfo()) pub.setId(generateIdentifier(pub, doi.toLowerCase)) try{ - pub.setAuthor(input.authors.map(a=> { - generateAuthor(a.name, a.surname, a.creditName, a.oid) - }).asJava) + + val l:List[Author]= input.getAuthors.asScala.map(a=> { + generateAuthor(a.getName, a.getSurname, a.getCreditName, a.getOid) + })(collection.breakOut) + + pub.setAuthor(l.asJava) pub.setCollectedfrom(List(DoiBoostMappingUtil.createORIDCollectedFrom()).asJava) pub.setDataInfo(DoiBoostMappingUtil.generateDataInfo()) pub @@ -63,6 +67,13 @@ object ORCIDToOAF { } } + def generateOricPIDDatainfo():DataInfo = { + val di =DoiBoostMappingUtil.generateDataInfo("0.91") + di.getProvenanceaction.setClassid("sysimport:crosswalk:entityregistry") + di.getProvenanceaction.setClassname("Harvested") + di + } + def generateAuthor(given: String, family: String, fullName:String, orcid: String): Author = { val a = new Author a.setName(given) @@ -72,7 +83,7 @@ object ORCIDToOAF { else a.setFullname(s"$given $family") if (StringUtils.isNotBlank(orcid)) - a.setPid(List(createSP(orcid, ORCID, PID_TYPES)).asJava) + a.setPid(List(createSP(orcid, ORCID, PID_TYPES, generateOricPIDDatainfo())).asJava) a } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index 1cd9ba4d4..f1c7c58b4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -1,21 +1,72 @@ package eu.dnetlib.doiboost.orcid +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.merge.AuthorMerger import eu.dnetlib.dhp.schema.oaf.Publication +import eu.dnetlib.dhp.schema.orcid.OrcidDOI import eu.dnetlib.doiboost.mag.ConversionUtil import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD +import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} object SparkConvertORCIDToOAF { + val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) + + def getPublicationAggregator(): Aggregator[(String, Publication), Publication, Publication] = new Aggregator[(String, Publication), Publication, Publication]{ + + override def zero: Publication = new Publication() + + override def reduce(b: Publication, a: (String, Publication)): Publication = { + b.mergeFrom(a._2) + b.setAuthor(AuthorMerger.mergeAuthor(a._2.getAuthor, b.getAuthor)) + if (b.getId == null) + b.setId(a._2.getId) + b + } + override def merge(wx: Publication, wy: Publication): Publication = { + wx.mergeFrom(wy) + wx.setAuthor(AuthorMerger.mergeAuthor(wy.getAuthor, wx.getAuthor)) + if(wx.getId == null && wy.getId.nonEmpty) + wx.setId(wy.getId) + wx + } + override def finish(reduction: Publication): Publication = reduction + + override def bufferEncoder: Encoder[Publication] = + Encoders.kryo(classOf[Publication]) + + override def outputEncoder: Encoder[Publication] = + Encoders.kryo(classOf[Publication]) + } + +def run(spark:SparkSession,sourcePath:String, targetPath:String):Unit = { + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] + implicit val mapOrcid: Encoder[OrcidDOI] = Encoders.kryo[OrcidDOI] + implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs) + + val mapper = new ObjectMapper() + mapper.getDeserializationConfig.withFeatures(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES) + + val dataset:Dataset[OrcidDOI] = spark.createDataset(spark.sparkContext.textFile(sourcePath).map(s => mapper.readValue(s,classOf[OrcidDOI]))) + + logger.info("Converting ORCID to OAF") + dataset.map(o => ORCIDToOAF.convertTOOAF(o)).filter(p=>p!=null) + .map(d => (d.getId, d)) + .groupByKey(_._1)(Encoders.STRING) + .agg(getPublicationAggregator().toColumn) + .map(p => p._2) + .write.mode(SaveMode.Overwrite).save(targetPath) +} def main(args: Array[String]): Unit = { - val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) + val conf: SparkConf = new SparkConf() val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json"))) parser.parseArgument(args) @@ -26,19 +77,12 @@ object SparkConvertORCIDToOAF { .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() - implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs) - import spark.implicits._ + + val sourcePath = parser.get("sourcePath") val targetPath = parser.get("targetPath") - val dataset:Dataset[ORCIDElement] = spark.read.json(sourcePath).as[ORCIDElement] + run(spark, sourcePath, targetPath) - - logger.info("Converting ORCID to OAF") - val d:RDD[Publication] = dataset.map(o => ORCIDToOAF.convertTOOAF(o)).filter(p=>p!=null).map(p=>(p.getId,p)).rdd.reduceByKey(ConversionUtil.mergePublication) - .map(_._2) - - spark.createDataset(d).as[Publication].write.mode(SaveMode.Overwrite).save(targetPath) } } diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml index e35f88abd..dcde62c9d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml @@ -39,14 +39,7 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - + diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml index 2277b79b0..9d19dddc7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml @@ -8,6 +8,10 @@ targetPath the working dir base path + + workingPath + the working dir base path + sparkDriverMemory memory for driver process @@ -31,10 +35,10 @@ - - + + - + @@ -52,10 +56,10 @@ ${sparkExtraOPT} --sourcePath${sourcePath} - --targetPath${targetPath} + --targetPath${workingPath} --masteryarn-cluster - + @@ -65,7 +69,7 @@ yarn-cluster cluster - Convert Mag to Dataset + Convert Mag to OAF Dataset eu.dnetlib.doiboost.mag.SparkPreProcessMAG dhp-doiboost-${projectVersion}.jar @@ -75,7 +79,8 @@ --conf spark.sql.shuffle.partitions=3840 ${sparkExtraOPT} - --sourcePath${sourcePath} + --sourcePath${workingPath} + --workingPath${workingPath}/process --targetPath${targetPath} --masteryarn-cluster diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json index bf0b80f69..d45f7269f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json @@ -1,6 +1,7 @@ [ {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the base path of MAG input", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the working dir path", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the target dir path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working dir path", "paramRequired": true}, {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala index 5b8240942..0222b393d 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -1,5 +1,8 @@ package eu.dnetlib.doiboost.orcid +import eu.dnetlib.dhp.schema.oaf.Publication +import eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF.getClass +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.codehaus.jackson.map.ObjectMapper import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Test @@ -21,6 +24,30 @@ class MappingORCIDToOAFTest { }) } +// @Test +// def testOAFConvert():Unit ={ +// +// val spark: SparkSession = +// SparkSession +// .builder() +// .appName(getClass.getSimpleName) +// .master("local[*]").getOrCreate() +// +// +// SparkConvertORCIDToOAF.run( spark,"/Users/sandro/Downloads/orcid", "/Users/sandro/Downloads/orcid_oaf") +// implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] +// +// val df = spark.read.load("/Users/sandro/Downloads/orcid_oaf").as[Publication] +// println(df.first.getId) +// println(mapper.writeValueAsString(df.first())) +// +// +// +// +// } + + + From 197f286fa476f2670a268ca08b7db7ed4154734c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 7 Dec 2020 21:52:17 +0100 Subject: [PATCH 099/108] removed duplicated dependency (org.apache.httpcomponents:httpclent --- pom.xml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pom.xml b/pom.xml index d06bdbe20..a2e2587b3 100644 --- a/pom.xml +++ b/pom.xml @@ -278,12 +278,12 @@ org.apache.httpcomponents httpclient - 4.5.3 + ${org.apache.httpcomponents.version} org.apache.httpcomponents httpmime - 4.5.3 + ${org.apache.httpcomponents.version} org.noggit @@ -484,12 +484,6 @@ ${common.text.version} - - org.apache.httpcomponents - httpclient - ${org.apache.httpcomponents.version} - - @@ -719,7 +713,7 @@ 1.8 4.1.2 1.8 - 4.3.4 + 4.5.3 4.0.1 From 2fcc24b36e04b6ee52ab148d7ecaaa885ea25576 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 7 Dec 2020 21:52:32 +0100 Subject: [PATCH 100/108] code formatting --- .../eu/dnetlib/dhp/schema/orcid/OrcidDOI.java | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java index 11bce26c8..cf372c12a 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java @@ -1,24 +1,25 @@ + package eu.dnetlib.dhp.schema.orcid; import java.util.List; public class OrcidDOI { - private String doi; - private List authors; + private String doi; + private List authors; - public String getDoi() { - return doi; - } + public String getDoi() { + return doi; + } - public void setDoi(String doi) { - this.doi = doi; - } + public void setDoi(String doi) { + this.doi = doi; + } - public List getAuthors() { - return authors; - } + public List getAuthors() { + return authors; + } - public void setAuthors(List authors) { - this.authors = authors; - } + public void setAuthors(List authors) { + this.authors = authors; + } } From fba11eef2ab857ab16810c4f522be2d7e9b06127 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 7 Dec 2020 21:53:13 +0100 Subject: [PATCH 101/108] cleanup --- .../broker/oa/PartitionEventsByDsIdJob.java | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java index 65d5e6f94..e9644122f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java @@ -18,6 +18,8 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.slf4j.Logger; @@ -40,8 +42,9 @@ public class PartitionEventsByDsIdJob { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(PartitionEventsByDsIdJob.class - .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/od_partitions_params.json"))); + .toString( + PartitionEventsByDsIdJob.class + .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/od_partitions_params.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -63,21 +66,27 @@ public class PartitionEventsByDsIdJob { final Set validOpendoarIds = new HashSet<>(); if (!opendoarIds.trim().equals("-")) { - validOpendoarIds.addAll(Arrays.stream(opendoarIds.split(",")) - .map(String::trim) - .filter(StringUtils::isNotBlank) - .map(s -> OPENDOAR_NSPREFIX + DigestUtils.md5Hex(s)) - .collect(Collectors.toSet())); + validOpendoarIds + .addAll( + Arrays + .stream(opendoarIds.split(",")) + .map(String::trim) + .filter(StringUtils::isNotBlank) + .map(s -> OPENDOAR_NSPREFIX + DigestUtils.md5Hex(s)) + .collect(Collectors.toSet())); } + log.info("validOpendoarIds: {}", validOpendoarIds); runWithSparkSession(conf, isSparkSessionManaged, spark -> { ClusterUtils .readPath(spark, eventsPath, Event.class) - .filter(e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId())) - .filter(e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX)) - .filter(e -> validOpendoarIds.contains(e.getMap().getTargetDatasourceId())) - .map(e -> messageFromNotification(e), Encoders.bean(ShortEventMessageWithGroupId.class)) + .filter((FilterFunction) e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId())) + .filter((FilterFunction) e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX)) + .filter((FilterFunction) e -> validOpendoarIds.contains(e.getMap().getTargetDatasourceId())) + .map( + (MapFunction) e -> messageFromNotification(e), + Encoders.bean(ShortEventMessageWithGroupId.class)) .coalesce(1) .write() .partitionBy("group") From 620e1307a39e54e3ee5d3da50a5675355f36d08e Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 9 Dec 2020 10:30:47 +0100 Subject: [PATCH 102/108] indentation --- .../broker/oa/PartitionEventsByDsIdJob.java | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java index 65d5e6f94..efb7a035c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java @@ -40,8 +40,9 @@ public class PartitionEventsByDsIdJob { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(PartitionEventsByDsIdJob.class - .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/od_partitions_params.json"))); + .toString( + PartitionEventsByDsIdJob.class + .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/od_partitions_params.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -63,11 +64,14 @@ public class PartitionEventsByDsIdJob { final Set validOpendoarIds = new HashSet<>(); if (!opendoarIds.trim().equals("-")) { - validOpendoarIds.addAll(Arrays.stream(opendoarIds.split(",")) - .map(String::trim) - .filter(StringUtils::isNotBlank) - .map(s -> OPENDOAR_NSPREFIX + DigestUtils.md5Hex(s)) - .collect(Collectors.toSet())); + validOpendoarIds + .addAll( + Arrays + .stream(opendoarIds.split(",")) + .map(String::trim) + .filter(StringUtils::isNotBlank) + .map(s -> OPENDOAR_NSPREFIX + DigestUtils.md5Hex(s)) + .collect(Collectors.toSet())); } runWithSparkSession(conf, isSparkSessionManaged, spark -> { From 370a5e650b0c9bdfac969f4d952e8491cd805694 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 9 Dec 2020 11:18:26 +0100 Subject: [PATCH 103/108] validation attributes in resultProject relations --- .../raw/AbstractMdRecordToOafMapper.java | 168 ++++++++---------- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 9 +- .../dnetlib/dhp/oa/graph/raw/oaf_record.xml | 2 +- .../dnetlib/dhp/oa/graph/raw/odf_dataset.xml | 2 +- 4 files changed, 88 insertions(+), 93 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 95dd1e1ca..4e60e9fdd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -70,10 +70,8 @@ public abstract class AbstractMdRecordToOafMapper { protected static final String DATACITE_SCHEMA_KERNEL_4_SLASH = "http://datacite.org/schema/kernel-4/"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; protected static final String DATACITE_SCHEMA_KERNEL_3_SLASH = "http://datacite.org/schema/kernel-3/"; - protected static final Qualifier ORCID_PID_TYPE = qualifier( - "ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); - protected static final Qualifier MAG_PID_TYPE = qualifier( - "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); protected static final Map nsContext = new HashMap<>(); @@ -87,8 +85,7 @@ public abstract class AbstractMdRecordToOafMapper { nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3); } - protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( - "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs, final boolean invisible) { this.vocs = vocs; @@ -100,26 +97,20 @@ public abstract class AbstractMdRecordToOafMapper { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); final Document doc = DocumentHelper - .parseText( - xml - .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); + .parseText(xml + .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); - final KeyValue collectedFrom = getProvenanceDatasource( - doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - if (collectedFrom == null) { - return null; - } + if (collectedFrom == null) { return null; } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - if (hostedBy == null) { - return null; - } + if (hostedBy == null) { return null; } final DataInfo info = prepareDataInfo(doc, invisible); final long lastUpdateTimestamp = new Date().getTime(); @@ -161,9 +152,7 @@ public abstract class AbstractMdRecordToOafMapper { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { - return null; - } + if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { return null; } return keyValue(createOpenaireId(10, dsId, true), dsName); } @@ -179,47 +168,47 @@ public abstract class AbstractMdRecordToOafMapper { final List oafs = new ArrayList<>(); switch (type.toLowerCase()) { - case "publication": - final Publication p = new Publication(); - populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp); - p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); - p.setJournal(prepareJournal(doc, info)); - oafs.add(p); - break; - case "dataset": - final Dataset d = new Dataset(); - populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp); - d.setResulttype(DATASET_DEFAULT_RESULTTYPE); - d.setStoragedate(prepareDatasetStorageDate(doc, info)); - d.setDevice(prepareDatasetDevice(doc, info)); - d.setSize(prepareDatasetSize(doc, info)); - d.setVersion(prepareDatasetVersion(doc, info)); - d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); - d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); - d.setGeolocation(prepareDatasetGeoLocations(doc, info)); - oafs.add(d); - break; - case "software": - final Software s = new Software(); - populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp); - s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); - s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); - s.setLicense(prepareSoftwareLicenses(doc, info)); - s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); - s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); - oafs.add(s); - break; - case "": - case "otherresearchproducts": - default: - final OtherResearchProduct o = new OtherResearchProduct(); - populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp); - o.setResulttype(ORP_DEFAULT_RESULTTYPE); - o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); - o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); - o.setTool(prepareOtherResearchProductTools(doc, info)); - oafs.add(o); - break; + case "publication": + final Publication p = new Publication(); + populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp); + p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); + p.setJournal(prepareJournal(doc, info)); + oafs.add(p); + break; + case "dataset": + final Dataset d = new Dataset(); + populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp); + d.setResulttype(DATASET_DEFAULT_RESULTTYPE); + d.setStoragedate(prepareDatasetStorageDate(doc, info)); + d.setDevice(prepareDatasetDevice(doc, info)); + d.setSize(prepareDatasetSize(doc, info)); + d.setVersion(prepareDatasetVersion(doc, info)); + d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); + d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); + d.setGeolocation(prepareDatasetGeoLocations(doc, info)); + oafs.add(d); + break; + case "software": + final Software s = new Software(); + populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp); + s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); + s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); + s.setLicense(prepareSoftwareLicenses(doc, info)); + s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); + s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); + oafs.add(s); + break; + case "": + case "otherresearchproducts": + default: + final OtherResearchProduct o = new OtherResearchProduct(); + populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp); + o.setResulttype(ORP_DEFAULT_RESULTTYPE); + o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); + o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); + o.setTool(prepareOtherResearchProductTools(doc, info)); + oafs.add(o); + break; } if (!oafs.isEmpty()) { @@ -244,25 +233,37 @@ public abstract class AbstractMdRecordToOafMapper { final String originalId = ((Node) o).getText(); + final String validationdDate = ((Node) o).valueOf("@validationDate"); + if (StringUtils.isNotBlank(originalId)) { final String projectId = createOpenaireId(40, originalId, true); res - .add( - getRelation( - docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelationWithValidationDate(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, lastUpdateTimestamp, validationdDate)); res - .add( - getRelation( - projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, - lastUpdateTimestamp)); + .add(getRelationWithValidationDate(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, lastUpdateTimestamp, validationdDate)); } } return res; } + protected Relation getRelationWithValidationDate(final String source, + final String target, + final String relType, + final String subRelType, + final String relClass, + final KeyValue collectedFrom, + final DataInfo info, + final long lastUpdateTimestamp, + final String validationDate) { + + final Relation r = getRelation(source, target, relType, subRelType, relClass, collectedFrom, info, lastUpdateTimestamp); + r.setValidated(StringUtils.isNotBlank(validationDate)); + r.setValidationDate(StringUtils.isNotBlank(validationDate) ? validationDate : null); + return r; + } + protected Relation getRelation(final String source, final String target, final String relType, @@ -455,9 +456,7 @@ public abstract class AbstractMdRecordToOafMapper { final String sp = n.valueOf("@sp"); final String vol = n.valueOf("@vol"); final String edition = n.valueOf("@edition"); - if (StringUtils.isNotBlank(name)) { - return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); - } + if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); } } return null; } @@ -466,9 +465,7 @@ public abstract class AbstractMdRecordToOafMapper { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); if (n != null) { final String id = n.valueOf("./*[local-name()='identifier']"); - if (StringUtils.isNotBlank(id)) { - return id; - } + if (StringUtils.isNotBlank(id)) { return id; } } return doc.valueOf("//*[local-name()='header']/*[local-name()='identifier']"); @@ -537,10 +534,7 @@ public abstract class AbstractMdRecordToOafMapper { for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; res - .add( - structuredProperty( - n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), - n.valueOf("@schemename"), info)); + .add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info)); } return res; } @@ -548,9 +542,7 @@ public abstract class AbstractMdRecordToOafMapper { protected OAIProvenance prepareOAIprovenance(final Document doc) { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); - if (n == null) { - return null; - } + if (n == null) { return null; } final String identifier = n.valueOf("./*[local-name()='identifier']"); final String baseURL = n.valueOf("./*[local-name()='baseURL']"); @@ -565,9 +557,7 @@ public abstract class AbstractMdRecordToOafMapper { protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) { final Node n = doc.selectSingleNode("//oaf:datainfo"); - if (n == null) { - return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); - } + if (n == null) { return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); @@ -579,9 +569,7 @@ public abstract class AbstractMdRecordToOafMapper { final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); final String trust = n.valueOf("./oaf:trust"); - return dataInfo( - deletedbyinference, inferenceprovenance, inferred, invisible, - qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); + return dataInfo(deletedbyinference, inferenceprovenance, inferred, invisible, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); } protected Field prepareField(final Node node, final String xpath, final DataInfo info) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 46cb1a535..a0348f8ec 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -141,7 +141,10 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r2.getRelClass())); assertTrue(StringUtils.isNotBlank(r1.getRelType())); assertTrue(StringUtils.isNotBlank(r2.getRelType())); - + assertTrue(r1.getValidated()); + assertTrue(r2.getValidated()); + assertEquals(r1.getValidationDate(), "2020-01-01"); + assertEquals(r2.getValidationDate(), "2020-01-01"); // System.out.println(new ObjectMapper().writeValueAsString(p)); // System.out.println(new ObjectMapper().writeValueAsString(r1)); // System.out.println(new ObjectMapper().writeValueAsString(r2)); @@ -246,6 +249,10 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r2.getRelClass())); assertTrue(StringUtils.isNotBlank(r1.getRelType())); assertTrue(StringUtils.isNotBlank(r2.getRelType())); + assertTrue(r1.getValidated()); + assertTrue(r2.getValidated()); + assertEquals(r1.getValidationDate(), "2020-01-01"); + assertEquals(r2.getValidationDate(), "2020-01-01"); } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml index 2c6c98ebb..f4b0c477f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml @@ -51,7 +51,7 @@ 0001 2017-01-01 - corda_______::226852 + corda_______::226852 OPEN und - corda_______::226852 + corda_______::226852 0001s From 5f21a356fd61432713af2431ff36987d8d0b2e40 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 9 Dec 2020 11:24:30 +0100 Subject: [PATCH 104/108] reindent --- .../raw/AbstractMdRecordToOafMapper.java | 153 +++++++++++------- 1 file changed, 92 insertions(+), 61 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 4e60e9fdd..267c6026d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -70,8 +70,10 @@ public abstract class AbstractMdRecordToOafMapper { protected static final String DATACITE_SCHEMA_KERNEL_4_SLASH = "http://datacite.org/schema/kernel-4/"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; protected static final String DATACITE_SCHEMA_KERNEL_3_SLASH = "http://datacite.org/schema/kernel-3/"; - protected static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); - protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier ORCID_PID_TYPE = qualifier( + "ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final Qualifier MAG_PID_TYPE = qualifier( + "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); protected static final Map nsContext = new HashMap<>(); @@ -85,7 +87,8 @@ public abstract class AbstractMdRecordToOafMapper { nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3); } - protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); + protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( + "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs, final boolean invisible) { this.vocs = vocs; @@ -97,20 +100,26 @@ public abstract class AbstractMdRecordToOafMapper { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); final Document doc = DocumentHelper - .parseText(xml - .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); + .parseText( + xml + .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); - final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + final KeyValue collectedFrom = getProvenanceDatasource( + doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - if (collectedFrom == null) { return null; } + if (collectedFrom == null) { + return null; + } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - if (hostedBy == null) { return null; } + if (hostedBy == null) { + return null; + } final DataInfo info = prepareDataInfo(doc, invisible); final long lastUpdateTimestamp = new Date().getTime(); @@ -152,7 +161,9 @@ public abstract class AbstractMdRecordToOafMapper { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { return null; } + if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { + return null; + } return keyValue(createOpenaireId(10, dsId, true), dsName); } @@ -168,47 +179,47 @@ public abstract class AbstractMdRecordToOafMapper { final List oafs = new ArrayList<>(); switch (type.toLowerCase()) { - case "publication": - final Publication p = new Publication(); - populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp); - p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); - p.setJournal(prepareJournal(doc, info)); - oafs.add(p); - break; - case "dataset": - final Dataset d = new Dataset(); - populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp); - d.setResulttype(DATASET_DEFAULT_RESULTTYPE); - d.setStoragedate(prepareDatasetStorageDate(doc, info)); - d.setDevice(prepareDatasetDevice(doc, info)); - d.setSize(prepareDatasetSize(doc, info)); - d.setVersion(prepareDatasetVersion(doc, info)); - d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); - d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); - d.setGeolocation(prepareDatasetGeoLocations(doc, info)); - oafs.add(d); - break; - case "software": - final Software s = new Software(); - populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp); - s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); - s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); - s.setLicense(prepareSoftwareLicenses(doc, info)); - s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); - s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); - oafs.add(s); - break; - case "": - case "otherresearchproducts": - default: - final OtherResearchProduct o = new OtherResearchProduct(); - populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp); - o.setResulttype(ORP_DEFAULT_RESULTTYPE); - o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); - o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); - o.setTool(prepareOtherResearchProductTools(doc, info)); - oafs.add(o); - break; + case "publication": + final Publication p = new Publication(); + populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp); + p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); + p.setJournal(prepareJournal(doc, info)); + oafs.add(p); + break; + case "dataset": + final Dataset d = new Dataset(); + populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp); + d.setResulttype(DATASET_DEFAULT_RESULTTYPE); + d.setStoragedate(prepareDatasetStorageDate(doc, info)); + d.setDevice(prepareDatasetDevice(doc, info)); + d.setSize(prepareDatasetSize(doc, info)); + d.setVersion(prepareDatasetVersion(doc, info)); + d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); + d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); + d.setGeolocation(prepareDatasetGeoLocations(doc, info)); + oafs.add(d); + break; + case "software": + final Software s = new Software(); + populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp); + s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); + s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); + s.setLicense(prepareSoftwareLicenses(doc, info)); + s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); + s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); + oafs.add(s); + break; + case "": + case "otherresearchproducts": + default: + final OtherResearchProduct o = new OtherResearchProduct(); + populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp); + o.setResulttype(ORP_DEFAULT_RESULTTYPE); + o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); + o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); + o.setTool(prepareOtherResearchProductTools(doc, info)); + oafs.add(o); + break; } if (!oafs.isEmpty()) { @@ -239,9 +250,15 @@ public abstract class AbstractMdRecordToOafMapper { final String projectId = createOpenaireId(40, originalId, true); res - .add(getRelationWithValidationDate(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, lastUpdateTimestamp, validationdDate)); + .add( + getRelationWithValidationDate( + docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, + lastUpdateTimestamp, validationdDate)); res - .add(getRelationWithValidationDate(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, lastUpdateTimestamp, validationdDate)); + .add( + getRelationWithValidationDate( + projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, + lastUpdateTimestamp, validationdDate)); } } @@ -258,7 +275,8 @@ public abstract class AbstractMdRecordToOafMapper { final long lastUpdateTimestamp, final String validationDate) { - final Relation r = getRelation(source, target, relType, subRelType, relClass, collectedFrom, info, lastUpdateTimestamp); + final Relation r = getRelation( + source, target, relType, subRelType, relClass, collectedFrom, info, lastUpdateTimestamp); r.setValidated(StringUtils.isNotBlank(validationDate)); r.setValidationDate(StringUtils.isNotBlank(validationDate) ? validationDate : null); return r; @@ -456,7 +474,9 @@ public abstract class AbstractMdRecordToOafMapper { final String sp = n.valueOf("@sp"); final String vol = n.valueOf("@vol"); final String edition = n.valueOf("@edition"); - if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); } + if (StringUtils.isNotBlank(name)) { + return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); + } } return null; } @@ -465,7 +485,9 @@ public abstract class AbstractMdRecordToOafMapper { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); if (n != null) { final String id = n.valueOf("./*[local-name()='identifier']"); - if (StringUtils.isNotBlank(id)) { return id; } + if (StringUtils.isNotBlank(id)) { + return id; + } } return doc.valueOf("//*[local-name()='header']/*[local-name()='identifier']"); @@ -534,7 +556,10 @@ public abstract class AbstractMdRecordToOafMapper { for (final Object o : node.selectNodes(xpath)) { final Node n = (Node) o; res - .add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info)); + .add( + structuredProperty( + n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), + n.valueOf("@schemename"), info)); } return res; } @@ -542,7 +567,9 @@ public abstract class AbstractMdRecordToOafMapper { protected OAIProvenance prepareOAIprovenance(final Document doc) { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); - if (n == null) { return null; } + if (n == null) { + return null; + } final String identifier = n.valueOf("./*[local-name()='identifier']"); final String baseURL = n.valueOf("./*[local-name()='baseURL']"); @@ -557,7 +584,9 @@ public abstract class AbstractMdRecordToOafMapper { protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) { final Node n = doc.selectSingleNode("//oaf:datainfo"); - if (n == null) { return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } + if (n == null) { + return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); + } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); @@ -569,7 +598,9 @@ public abstract class AbstractMdRecordToOafMapper { final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); final String trust = n.valueOf("./oaf:trust"); - return dataInfo(deletedbyinference, inferenceprovenance, inferred, invisible, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); + return dataInfo( + deletedbyinference, inferenceprovenance, inferred, invisible, + qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); } protected Field prepareField(final Node node, final String xpath, final DataInfo info) { From 1bc9adc10d79455cd910249fcc431ef23b7a214f Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 9 Dec 2020 16:18:37 +0100 Subject: [PATCH 105/108] default trust for validated rels --- .../oa/graph/raw/AbstractMdRecordToOafMapper.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 267c6026d..cccf15398 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -75,6 +75,8 @@ public abstract class AbstractMdRecordToOafMapper { protected static final Qualifier MAG_PID_TYPE = qualifier( "MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES); + protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999"; + protected static final Map nsContext = new HashMap<>(); static { @@ -279,6 +281,16 @@ public abstract class AbstractMdRecordToOafMapper { source, target, relType, subRelType, relClass, collectedFrom, info, lastUpdateTimestamp); r.setValidated(StringUtils.isNotBlank(validationDate)); r.setValidationDate(StringUtils.isNotBlank(validationDate) ? validationDate : null); + + if (StringUtils.isNotBlank(validationDate)) { + r.setValidated(true); + r.setValidationDate(validationDate); + r.getDataInfo().setTrust(DEFAULT_TRUST_FOR_VALIDATED_RELS); + } else { + r.setValidated(false); + r.setValidationDate(null); + } + return r; } From 212b52614fefa9a08dc3fc08c8325a0c5a0dc5b5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 9 Dec 2020 16:59:02 +0100 Subject: [PATCH 106/108] added graph mapper versus community result without context and project in common to be used for the doiboost mapping --- .../java/eu/dnetlib/dhp/common/Constants.java | 44 ++ .../dnetlib/dhp/common/GraphResultMapper.java | 415 ++++++++++++++++++ 2 files changed, 459 insertions(+) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java new file mode 100644 index 000000000..c9d15ff76 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -0,0 +1,44 @@ + +package eu.dnetlib.dhp.common; + +import com.google.common.collect.Maps; + +import java.util.Map; + +public class Constants { + + public static final Map accessRightsCoarMap = Maps.newHashMap(); + public static final Map coarCodeLabelMap = Maps.newHashMap(); + + public static final String INFERRED = "Inferred by OpenAIRE"; + + public static final String HARVESTED = "Harvested"; + public static final String DEFAULT_TRUST = "0.9"; + public static final String USER_CLAIM = "Linked by user";; + + public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; + + public static String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/"; + + public static String RESEARCH_COMMUNITY = "Research Community"; + + public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative"; + + public static String ORCID = "orcid"; + + static { + accessRightsCoarMap.put("OPEN", "c_abf2"); + accessRightsCoarMap.put("RESTRICTED", "c_16ec"); + accessRightsCoarMap.put("OPEN SOURCE", "c_abf2"); + accessRightsCoarMap.put("CLOSED", "c_14cb"); + accessRightsCoarMap.put("EMBARGO", "c_f1cf"); + } + + static { + coarCodeLabelMap.put("c_abf2", "OPEN"); + coarCodeLabelMap.put("c_16ec", "RESTRICTED"); + coarCodeLabelMap.put("c_14cb", "CLOSED"); + coarCodeLabelMap.put("c_f1cf", "EMBARGO"); + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java new file mode 100644 index 000000000..5e37f15db --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java @@ -0,0 +1,415 @@ + +package eu.dnetlib.dhp.common; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.dump.oaf.*; +import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance; +import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.Journal; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; + +public class GraphResultMapper implements Serializable { + + public static Result map( + E in) { + + CommunityResult out = new CommunityResult(); + + eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in; + Optional ort = Optional.ofNullable(input.getResulttype()); + if (ort.isPresent()) { + switch (ort.get().getClassid()) { + case "publication": + Optional journal = Optional + .ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal()); + if (journal.isPresent()) { + Journal j = journal.get(); + Container c = new Container(); + c.setConferencedate(j.getConferencedate()); + c.setConferenceplace(j.getConferenceplace()); + c.setEdition(j.getEdition()); + c.setEp(j.getEp()); + c.setIss(j.getIss()); + c.setIssnLinking(j.getIssnLinking()); + c.setIssnOnline(j.getIssnOnline()); + c.setIssnPrinted(j.getIssnPrinted()); + c.setName(j.getName()); + c.setSp(j.getSp()); + c.setVol(j.getVol()); + out.setContainer(c); + out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname()); + } + break; + case "dataset": + eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input; + Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue())); + Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue())); + + out + .setGeolocation( + Optional + .ofNullable(id.getGeolocation()) + .map( + igl -> igl + .stream() + .filter(Objects::nonNull) + .map(gli -> { + GeoLocation gl = new GeoLocation(); + gl.setBox(gli.getBox()); + gl.setPlace(gli.getPlace()); + gl.setPoint(gli.getPoint()); + return gl; + }) + .collect(Collectors.toList())) + .orElse(null)); + + out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname()); + break; + case "software": + + eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input; + Optional + .ofNullable(is.getCodeRepositoryUrl()) + .ifPresent(value -> out.setCodeRepositoryUrl(value.getValue())); + Optional + .ofNullable(is.getDocumentationUrl()) + .ifPresent( + value -> out + .setDocumentationUrl( + value + .stream() + .map(v -> v.getValue()) + .collect(Collectors.toList()))); + + Optional + .ofNullable(is.getProgrammingLanguage()) + .ifPresent(value -> out.setProgrammingLanguage(value.getClassid())); + + out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname()); + break; + case "other": + + eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input; + out + .setContactgroup( + Optional + .ofNullable(ir.getContactgroup()) + .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) + .orElse(null)); + + out + .setContactperson( + Optional + .ofNullable(ir.getContactperson()) + .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) + .orElse(null)); + out + .setTool( + Optional + .ofNullable(ir.getTool()) + .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) + .orElse(null)); + + out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); + + break; + } + + Optional + .ofNullable(input.getAuthor()) + .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); + + // I do not map Access Right UNKNOWN or OTHER + + Optional oar = Optional.ofNullable(input.getBestaccessright()); + if (oar.isPresent()) { + if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) { + String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); + out + .setBestaccessright( + AccessRight + .newInstance( + code, + Constants.coarCodeLabelMap.get(code), + Constants.COAR_ACCESS_RIGHT_SCHEMA)); + } + } + + final List contributorList = new ArrayList<>(); + Optional + .ofNullable(input.getContributor()) + .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue()))); + out.setContributor(contributorList); + + Optional + .ofNullable(input.getCountry()) + .ifPresent( + value -> out + .setCountry( + value + .stream() + .map( + c -> { + if (c.getClassid().equals((ModelConstants.UNKNOWN))) { + return null; + } + Country country = new Country(); + country.setCode(c.getClassid()); + country.setLabel(c.getClassname()); + Optional + .ofNullable(c.getDataInfo()) + .ifPresent( + provenance -> country + .setProvenance( + Provenance + .newInstance( + provenance + .getProvenanceaction() + .getClassname(), + c.getDataInfo().getTrust()))); + return country; + }) + .filter(Objects::nonNull) + .collect(Collectors.toList()))); + + final List coverageList = new ArrayList<>(); + Optional + .ofNullable(input.getCoverage()) + .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue()))); + out.setCoverage(coverageList); + + out.setDateofcollection(input.getDateofcollection()); + + final List descriptionList = new ArrayList<>(); + Optional + .ofNullable(input.getDescription()) + .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); + out.setDescription(descriptionList); + Optional> oStr = Optional.ofNullable(input.getEmbargoenddate()); + if (oStr.isPresent()) { + out.setEmbargoenddate(oStr.get().getValue()); + } + + final List formatList = new ArrayList<>(); + Optional + .ofNullable(input.getFormat()) + .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); + out.setFormat(formatList); + out.setId(input.getId()); + out.setOriginalId(input.getOriginalId()); + + Optional> oInst = Optional + .ofNullable(input.getInstance()); + + if (oInst.isPresent()) { + out + .setInstance( + oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList())); + + } + + Optional oL = Optional.ofNullable(input.getLanguage()); + if (oL.isPresent()) { + eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get(); + out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname())); + } + Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp()); + if (oLong.isPresent()) { + out.setLastupdatetimestamp(oLong.get()); + } + Optional> otitle = Optional.ofNullable(input.getTitle()); + if (otitle.isPresent()) { + List iTitle = otitle + .get() + .stream() + .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) + .collect(Collectors.toList()); + if (iTitle.size() > 0) { + out.setMaintitle(iTitle.get(0).getValue()); + } + + iTitle = otitle + .get() + .stream() + .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) + .collect(Collectors.toList()); + if (iTitle.size() > 0) { + out.setSubtitle(iTitle.get(0).getValue()); + } + + } + + List pids = new ArrayList<>(); + Optional + .ofNullable(input.getPid()) + .ifPresent( + value -> value + .stream() + .forEach( + p -> pids + .add( + ControlledField + .newInstance(p.getQualifier().getClassid(), p.getValue())))); + out.setPid(pids); + oStr = Optional.ofNullable(input.getDateofacceptance()); + if (oStr.isPresent()) { + out.setPublicationdate(oStr.get().getValue()); + } + oStr = Optional.ofNullable(input.getPublisher()); + if (oStr.isPresent()) { + out.setPublisher(oStr.get().getValue()); + } + + List sourceList = new ArrayList<>(); + Optional + .ofNullable(input.getSource()) + .ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue()))); + // out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList())); + List subjectList = new ArrayList<>(); + Optional + .ofNullable(input.getSubject()) + .ifPresent( + value -> value + .forEach(s -> subjectList.add(getSubject(s)))); + + out.setSubjects(subjectList); + + out.setType(input.getResulttype().getClassid()); + } + + out + .setCollectedfrom( + input + .getCollectedfrom() + .stream() + .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue())) + .collect(Collectors.toList())); + + + return out; + + } + + + private static CommunityInstance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i) { + CommunityInstance instance = new CommunityInstance(); + + setCommonValue(i, instance); + + instance + .setCollectedfrom( + KeyValue + .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue())); + + instance + .setHostedby( + KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); + + return instance; + + } + + private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) { + Optional opAr = Optional + .ofNullable(i.getAccessright()); + if (opAr.isPresent()) { + if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) { + String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid()); + instance + .setAccessright( + AccessRight + .newInstance( + code, + Constants.coarCodeLabelMap.get(code), + Constants.COAR_ACCESS_RIGHT_SCHEMA)); + } + } + + Optional + .ofNullable(i.getLicense()) + .ifPresent(value -> instance.setLicense(value.getValue())); + Optional + .ofNullable(i.getDateofacceptance()) + .ifPresent(value -> instance.setPublicationdate(value.getValue())); + Optional + .ofNullable(i.getRefereed()) + .ifPresent(value -> instance.setRefereed(value.getClassname())); + Optional + .ofNullable(i.getInstancetype()) + .ifPresent(value -> instance.setType(value.getClassname())); + Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); + + } + + + private static Subject getSubject(StructuredProperty s) { + Subject subject = new Subject(); + subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue())); + Optional di = Optional.ofNullable(s.getDataInfo()); + if (di.isPresent()) { + Provenance p = new Provenance(); + p.setProvenance(di.get().getProvenanceaction().getClassname()); + p.setTrust(di.get().getTrust()); + subject.setProvenance(p); + } + + return subject; + } + + private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) { + Author a = new Author(); + a.setFullname(oa.getFullname()); + a.setName(oa.getName()); + a.setSurname(oa.getSurname()); + a.setRank(oa.getRank()); + + Optional> oPids = Optional + .ofNullable(oa.getPid()); + if (oPids.isPresent()) { + Pid pid = getOrcid(oPids.get()); + if (pid != null) { + a.setPid(pid); + } + } + + return a; + } + + private static Pid getOrcid(List p) { + for (StructuredProperty pid : p) { + if (pid.getQualifier().getClassid().equals(Constants.ORCID)) { + Optional di = Optional.ofNullable(pid.getDataInfo()); + if (di.isPresent()) { + return Pid + .newInstance( + ControlledField + .newInstance( + pid.getQualifier().getClassid(), + pid.getValue()), + Provenance + .newInstance( + di.get().getProvenanceaction().getClassname(), + di.get().getTrust())); + } else { + return Pid + .newInstance( + ControlledField + .newInstance( + pid.getQualifier().getClassid(), + pid.getValue()) + + ); + } + + } + } + return null; + } + +} From 6fbc67a9595564c5df9976e0751016c4843317c7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 9 Dec 2020 17:10:20 +0100 Subject: [PATCH 107/108] using ModelConstant.ORCID and removing not used constants --- .../main/java/eu/dnetlib/dhp/common/Constants.java | 14 -------------- .../eu/dnetlib/dhp/common/GraphResultMapper.java | 2 +- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index c9d15ff76..6a986e145 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -10,22 +10,8 @@ public class Constants { public static final Map accessRightsCoarMap = Maps.newHashMap(); public static final Map coarCodeLabelMap = Maps.newHashMap(); - public static final String INFERRED = "Inferred by OpenAIRE"; - - public static final String HARVESTED = "Harvested"; - public static final String DEFAULT_TRUST = "0.9"; - public static final String USER_CLAIM = "Linked by user";; - public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; - public static String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/"; - - public static String RESEARCH_COMMUNITY = "Research Community"; - - public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative"; - - public static String ORCID = "orcid"; - static { accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("RESTRICTED", "c_16ec"); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java index 5e37f15db..3956410d5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java @@ -383,7 +383,7 @@ public class GraphResultMapper implements Serializable { private static Pid getOrcid(List p) { for (StructuredProperty pid : p) { - if (pid.getQualifier().getClassid().equals(Constants.ORCID)) { + if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) { Optional di = Optional.ofNullable(pid.getDataInfo()); if (di.isPresent()) { return Pid From db4e400a0b77e29f3ccc9e48286024263465925b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 9 Dec 2020 18:01:45 +0100 Subject: [PATCH 108/108] introduced Oaf.mergeFrom method to allow merging of dataInfo(s), with prevalence based on datainfo.trust --- .../java/eu/dnetlib/dhp/schema/oaf/Oaf.java | 38 +++++++++++++++++-- .../eu/dnetlib/dhp/schema/oaf/OafEntity.java | 6 +-- .../eu/dnetlib/dhp/schema/oaf/Relation.java | 14 +------ 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java index 3496492e8..fef32c0c6 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java @@ -2,8 +2,12 @@ package eu.dnetlib.dhp.schema.oaf; import java.io.Serializable; +import java.util.Collection; import java.util.List; import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; public abstract class Oaf implements Serializable { @@ -40,9 +44,35 @@ public abstract class Oaf implements Serializable { this.lastupdatetimestamp = lastupdatetimestamp; } - public void mergeOAFDataInfo(Oaf e) { - if (e.getDataInfo() != null && compareTrust(this, e) < 0) - dataInfo = e.getDataInfo(); + public void mergeFrom(Oaf o) { + if (Objects.isNull(o)) { + return; + } + setCollectedfrom( + Stream + .concat( + Optional + .ofNullable(getCollectedfrom()) + .map(Collection::stream) + .orElse(Stream.empty()), + Optional + .ofNullable(o.getCollectedfrom()) + .map(Collection::stream) + .orElse(Stream.empty())) + .distinct() // relies on KeyValue.equals + .collect(Collectors.toList())); + + mergeOAFDataInfo(o); + + setLastupdatetimestamp( + Math.max( + Optional.ofNullable(getLastupdatetimestamp()).orElse(0L), + Optional.ofNullable(o.getLastupdatetimestamp()).orElse(0L))); + } + + public void mergeOAFDataInfo(Oaf o) { + if (o.getDataInfo() != null && compareTrust(this, o) < 0) + dataInfo = o.getDataInfo(); } protected String extractTrust(Oaf e) { @@ -62,7 +92,7 @@ public abstract class Oaf implements Serializable { if (o == null || getClass() != o.getClass()) return false; Oaf oaf = (Oaf) o; - return Objects.equals(dataInfo, oaf.dataInfo) + return Objects.equals(getDataInfo(), oaf.getDataInfo()) && Objects.equals(lastupdatetimestamp, oaf.lastupdatetimestamp); } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java index 2823ee49d..17c3e6bdd 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java @@ -78,14 +78,10 @@ public abstract class OafEntity extends Oaf implements Serializable { } public void mergeFrom(OafEntity e) { - - if (e == null) - return; + super.mergeFrom(e); originalId = mergeLists(originalId, e.getOriginalId()); - collectedfrom = mergeLists(collectedfrom, e.getCollectedfrom()); - pid = mergeLists(pid, e.getPid()); if (e.getDateofcollection() != null && compareTrust(this, e) < 0) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java index 17a50d7ac..0de34dbec 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java @@ -130,19 +130,7 @@ public class Relation extends Oaf { Objects.equals(getSubRelType(), r.getSubRelType()), "subRelType(s) must be equal"); checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal"); - setCollectedfrom( - Stream - .concat( - Optional - .ofNullable(getCollectedfrom()) - .map(Collection::stream) - .orElse(Stream.empty()), - Optional - .ofNullable(r.getCollectedfrom()) - .map(Collection::stream) - .orElse(Stream.empty())) - .distinct() // relies on KeyValue.equals - .collect(Collectors.toList())); + super.mergeFrom(r); } @Override